This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch matchJava4.0.0
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/matchJava4.0.0 by this push:
new 080028e Remove obsolete tests
080028e is described below
commit 080028e65cf9f0581a56d2cf294b631fcec50d6a
Author: Lee Rhodes <[email protected]>
AuthorDate: Thu Oct 19 12:55:05 2023 -0700
Remove obsolete tests
---
pom.xml | 35 ++--
src/main/java/org/apache/datasketches/Job.java | 4 +-
.../java/org/apache/datasketches/SpacedPoints.java | 2 +
.../hll/DruidHllAccuracyProfile.java | 74 -------
.../hll/DruidHllMergeAccuracyProfile.java | 85 --------
.../hll/HllConfidenceIntervalInverseProfile.java | 2 +-
.../hll/ZetaHllAccuracyProfile.java | 85 --------
.../hll/ZetaHllMergeAccuracyProfile.java | 97 ---------
.../hll/ZetaHllMergeSpeedProfile.java | 89 --------
.../characterization/hll/ZetaHllSerDeProfile.java | 107 ----------
.../hll/ZetaHllUnionUpdateSpeedProfile.java | 79 -------
.../hll/ZetaHllUpdateSpeedProfile.java | 80 -------
...llDoublesSketchRankGaussianAccuracyProfile.java | 8 +-
...KllFloatsSketchRankGaussianAccuracyProfile.java | 2 +-
.../quantiles/DoublesSketchSpeedProfile.java | 35 ++--
.../quantiles/DruidAppHistStreamAProfile.java | 230 ---------------------
.../quantiles/ItemsSketchSpeedProfile.java | 52 +++--
.../quantiles/MSketchStreamAProfile.java | 203 ------------------
.../quantiles/tdigest/DataGenerator.java | 81 --------
.../quantiles/tdigest/DoubleRankCalculator.java | 53 -----
.../tdigest/QuantilesAccuracyProfile.java | 79 -------
.../quantiles/tdigest/QuantilesSpeedProfile.java | 86 --------
.../quantiles/tdigest/TDigestAccuracyProfile.java | 83 --------
.../quantiles/tdigest/TDigestSpeedProfile.java | 167 ---------------
.../quantiles/tdigest/package-info.java | 24 ---
.../req/ReqSketchAccuracyProfile.java | 4 +-
.../req/ReqSketchAccuracyProfile2.java | 2 +-
.../characterization/req/TrueFloatRanks.java | 2 +-
.../characterization/theta/ThetaSerDeProfile.java | 3 -
.../concurrent/ConcurrentThetaAccuracyProfile.java | 2 +-
.../ConcurrentThetaMultithreadedSpeedProfile.java | 2 +-
.../ConcurrentThetaUpdateSpeedProfile.java | 2 +-
.../uniquecount/BaseAccuracyProfile.java | 2 +-
.../uniquecount/BaseBoundsAccuracyProfile.java | 2 +-
.../resources/hll/druid/DruidHllAccuracyJob.conf | 44 ----
.../hll/druid/DruidHllMergeAccuracyJob.conf | 22 --
.../resources/hll/zeta/ZetaHllAccuracyJob.conf | 46 -----
.../hll/zeta/ZetaHllMergeAccuracyJob.conf | 37 ----
.../resources/hll/zeta/ZetaHllMergeSpeedJob.conf | 39 ----
src/main/resources/hll/zeta/ZetaHllSerDeJob.conf | 43 ----
src/main/resources/hll/zeta/ZetaHllSpeedJob.conf | 43 ----
.../hll/zeta/ZetaHllUnionUpdateSpeedJob.conf | 42 ----
.../resources/quantiles/DruidAHStreamAJob.conf | 38 ----
.../resources/quantiles/MSketchStreamAJob.conf | 39 ----
.../resources/quantiles/TDigestAccuracyJob.conf | 27 ---
src/main/resources/quantiles/TDigestSpeedJob.conf | 27 ---
.../apache/datasketches/MonotonicPointsTest.java | 3 +-
47 files changed, 89 insertions(+), 2224 deletions(-)
diff --git a/pom.xml b/pom.xml
index 014870a..7dfbeec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,8 +83,8 @@ under the License.
<properties>
<!-- UNIQUE FOR THIS JAVA COMPONENT -->
- <datasketches-memory.version>2.0.0</datasketches-memory.version>
- <datasketches-java.version>3.2.0</datasketches-java.version>
+ <datasketches-memory.version>2.2.0</datasketches-memory.version>
+ <datasketches-java.version>4.1.0</datasketches-java.version>
<druid-momentsketch.version>0.16.0-incubating</druid-momentsketch.version>
<druid-histogram.version>0.16.0-incubating</druid-histogram.version>
<druid-hll.version>0.16.0-incubating</druid-hll.version>
@@ -93,7 +93,7 @@ under the License.
<!-- END:UNIQUE FOR THIS JAVA COMPONENT -->
<!-- Test -->
- <testng.version>7.4.0</testng.version>
+ <testng.version>7.5.1</testng.version>
<!-- System-wide properties -->
<maven.version>3.5.0</maven.version>
@@ -105,6 +105,7 @@ under the License.
<project.build.sourceEncoding>${charset.encoding}</project.build.sourceEncoding>
<project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding>
<project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding>
+
<maven.build.timestamp.format>yyyy-MM-dd'T'HH-mm-ss'Z'</maven.build.timestamp.format>
<!-- org.codehaus plugins -->
<!-- used for strict profile testing-->
@@ -115,14 +116,15 @@ under the License.
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!--
overrides parent -->
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!--
overrides parent -->
<maven-deploy-plugin.version>3.0.0-M1</maven-deploy-plugin.version> <!--
overrides parent -->
- <maven-enforcer-plugin.version>3.0.0-M2</maven-enforcer-plugin.version>
<!-- overrides parent -->
- <maven-gpg-plugin.version>3.0.1</maven-gpg-plugin.version> <!-- overrides
parent -->
+ <maven-enforcer-plugin.version>3.0.0</maven-enforcer-plugin.version> <!--
overrides parent -->
+ <maven-failsafe-plugin.version>3.1.2</maven-failsafe-plugin.version>
+ <maven-gpg-plugin.version>3.1.0</maven-gpg-plugin.version> <!-- overrides
parent -->
<maven-jar-plugin.version>3.2.0</maven-jar-plugin.version> <!-- overrides
parent -->
<maven-javadoc-plugin.version>3.3.1</maven-javadoc-plugin.version> <!--
overrides parent -->
<maven-release-plugin.version>3.0.0-M4</maven-release-plugin.version> <!--
overrides parent -->
<maven-remote-resources-plugin.version>[1.7.0,)</maven-remote-resources-plugin.version>
<!-- overrides parent -->
<maven-source-plugin.version>3.2.1</maven-source-plugin.version> <!--
overrides parent -->
- <maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version>
<!-- overrides parent -->
+ <maven-surefire-plugin.version>3.1.2</maven-surefire-plugin.version> <!--
overrides parent -->
<!-- Apache Plugins -->
<apache-rat-plugin.version>0.13</apache-rat-plugin.version> <!-- overrides
parent -->
<!-- org.jacoco Maven Plugins -->
@@ -149,40 +151,41 @@ under the License.
<version>${datasketches-java.version}</version>
</dependency>
- <!-- Druid HLL Sketch -->
+
+ <!-- Druid HLL Sketch
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-hll</artifactId>
<version>${druid-hll.version}</version>
- </dependency>
+ </dependency> -->
- <!-- Druid Moment Sketch -->
+ <!-- Druid Moment Sketch
<dependency>
<groupId>org.apache.druid.extensions.contrib</groupId>
<artifactId>druid-momentsketch</artifactId>
<version>${druid-momentsketch.version}</version>
- </dependency>
+ </dependency> -->
- <!-- Druid Histogram -->
+ <!-- Druid Histogram
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-histogram</artifactId>
<version>${druid-histogram.version}</version>
- </dependency>
+ </dependency> -->
- <!-- ZetaSketch -->
+ <!-- ZetaSketch
<dependency>
<groupId>com.google.zetasketch</groupId>
<artifactId>zetasketch</artifactId>
<version>${zetasketch.version}</version>
- </dependency>
+ </dependency> -->
- <!-- t-Digest -->
+ <!-- t-Digest
<dependency>
<groupId>com.tdunning</groupId>
<artifactId>t-digest</artifactId>
<version>${t-digest.version}</version>
- </dependency>
+ </dependency> -->
<!-- Dependency on Test code -->
<dependency>
diff --git a/src/main/java/org/apache/datasketches/Job.java
b/src/main/java/org/apache/datasketches/Job.java
index 0b7ce29..97df47e 100644
--- a/src/main/java/org/apache/datasketches/Job.java
+++ b/src/main/java/org/apache/datasketches/Job.java
@@ -21,8 +21,8 @@ package org.apache.datasketches;
import static org.apache.datasketches.Files.isFileValid;
import static org.apache.datasketches.Files.openPrintWriter;
-import static org.apache.datasketches.Util.getResourcePath;
-import static org.apache.datasketches.Util.milliSecToString;
+import static org.apache.datasketches.common.Util.getResourcePath;
+import static org.apache.datasketches.common.Util.milliSecToString;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
diff --git a/src/main/java/org/apache/datasketches/SpacedPoints.java
b/src/main/java/org/apache/datasketches/SpacedPoints.java
index 2bbdc33..29a1cdc 100644
--- a/src/main/java/org/apache/datasketches/SpacedPoints.java
+++ b/src/main/java/org/apache/datasketches/SpacedPoints.java
@@ -24,6 +24,8 @@ import static java.lang.Math.exp;
import static java.lang.Math.log;
import static java.lang.Math.pow;
+import org.apache.datasketches.common.SketchesArgumentException;
+
/**
* @author Lee Rhodes
*/
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/DruidHllAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/DruidHllAccuracyProfile.java
deleted file mode 100644
index ed2259d..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/DruidHllAccuracyProfile.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import org.apache.datasketches.characterization.AccuracyStats;
-import
org.apache.datasketches.characterization.uniquecount.BaseAccuracyProfile;
-import org.apache.druid.hll.HyperLogLogCollector;
-import org.apache.druid.hll.HyperLogLogHash;
-
-public class DruidHllAccuracyProfile extends BaseAccuracyProfile {
-
- private static final HyperLogLogHash hash = HyperLogLogHash.getDefault();
- private static final byte[] bytes = new byte[8]; // for key conversion
-
- private HyperLogLogCollector sketch;
- private boolean useString;
-
- @Override
- public void configure() {
- final String useStringStr = prop.get("Trials_string");
- useString = (useStringStr == null) ? false :
Boolean.parseBoolean(useStringStr);
- }
-
- @Override
- public void doTrial() {
- final int qArrLen = qArr.length;
- sketch = HyperLogLogCollector.makeLatestCollector();
- long lastUniques = 0;
- for (int i = 0; i < qArrLen; i++) {
- final AccuracyStats q = qArr[i];
- final long delta = (long)(q.trueValue - lastUniques);
- for (long u = 0; u < delta; u++) {
- if (useString) {
- final String vstr = Long.toHexString(++vIn);
- sketch.add(hash.hash(vstr));
- } else {
- longToByteArray(++vIn, bytes);
- sketch.add(hash.hash(bytes));
- }
- }
- lastUniques += delta;
- final double est = sketch.estimateCardinality();
- q.update(est);
- if (getSize) {
- q.bytes = sketch.toByteArray().length;
- }
- }
- }
-
- static void longToByteArray(long value, final byte[] bytes) {
- for (int i = 7; i >= 0; i--) {
- bytes[i] = (byte) value;
- value >>>= 8;
- }
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/DruidHllMergeAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/DruidHllMergeAccuracyProfile.java
deleted file mode 100644
index 74c4a43..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/DruidHllMergeAccuracyProfile.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import java.util.Random;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-import org.apache.druid.hll.HyperLogLogCollector;
-import org.apache.druid.hll.HyperLogLogHash;
-
-public class DruidHllMergeAccuracyProfile implements JobProfile {
-
- private static final Random random = new Random();
-
- private static final HyperLogLogHash hash = HyperLogLogHash.getDefault();
- private static final byte[] bytes = new byte[8]; // for key conversion
-
- private Job job;
-
- @Override
- public void start(final Job job) {
- this.job = job;
- runMergeTrials();
- }
-
- @Override
- public void shutdown() { }
-
- @Override
- public void cleanup() { }
-
- private void runMergeTrials() {
- long key = random.nextLong();
-
- final int numTrials =
Integer.parseInt(job.getProperties().mustGet("numTrials"));
- final int numSketches =
Integer.parseInt(job.getProperties().mustGet("numSketches"));
- final int distinctKeysPerSketch =
Integer.parseInt(job.getProperties().mustGet("distinctKeysPerSketch"));
- final double trueCount = numSketches * distinctKeysPerSketch;
- double sumEstimates = 0;
- double sumOfSquaredDeviationsFromTrueCount = 0;
-
- for (int t = 0; t < numTrials; t++) {
- final HyperLogLogCollector union =
HyperLogLogCollector.makeLatestCollector();
-
- for (int s = 0; s < numSketches; s++) {
- final HyperLogLogCollector sketch =
HyperLogLogCollector.makeLatestCollector();
- for (int k = 0; k < distinctKeysPerSketch; k++) {
- DruidHllAccuracyProfile.longToByteArray(key++, bytes);
- sketch.add(hash.hash(bytes));
- }
- union.fold(sketch);
- }
- final double estimatedCount = union.estimateCardinality();
- sumEstimates += estimatedCount;
- sumOfSquaredDeviationsFromTrueCount += (estimatedCount - trueCount) *
(estimatedCount - trueCount);
- }
- final double meanEstimate = sumEstimates / numTrials;
- final double meanRelativeError = meanEstimate / trueCount - 1;
- final double relativeStandardError
- = Math.sqrt(sumOfSquaredDeviationsFromTrueCount / numTrials) / trueCount;
- job.println("True count: " + trueCount);
- job. println("Mean estimate: " + meanEstimate);
- job.println("Mean Relative Error: " + meanRelativeError);
- job.println("Relative Standard Error: " + relativeStandardError);
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
index e0975eb..36fd126 100644
---
a/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
@@ -22,7 +22,7 @@ package org.apache.datasketches.characterization.hll;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
-import static org.apache.datasketches.Util.milliSecToString;
+import static org.apache.datasketches.common.Util.milliSecToString;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
import java.io.PrintWriter;
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java
deleted file mode 100644
index c7f64c2..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import org.apache.datasketches.characterization.AccuracyStats;
-import
org.apache.datasketches.characterization.uniquecount.BaseAccuracyProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllAccuracyProfile extends BaseAccuracyProfile {
- private enum ZetaType { LONG, INTEGER, STRING, BYTES }
-
- private HyperLogLogPlusPlus<?> sketch;
- private HyperLogLogPlusPlus.Builder hllBuilder;
- private int lgSP;
- private String zetaType;
- private ZetaType zType;
-
- @Override
- public void configure() {
- final int lgK = Integer.parseInt(prop.mustGet("LgK"));
- lgSP = Integer.parseInt(prop.mustGet("LgSP"));
- zetaType = prop.mustGet("ZetaType");
- hllBuilder = new HyperLogLogPlusPlus.Builder();
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- if (zetaType.equals("LONG")) {
- zType = ZetaType.LONG;
- } else if (zetaType.equals("INTEGER")) {
- zType = ZetaType.INTEGER;
- } else if (zetaType.equals("STRING")) {
- zType = ZetaType.STRING;
- } else if (zetaType.equals("BYTES")) {
- zType = ZetaType.BYTES;
- }
- reset();
- }
-
- private void reset() {
- switch (zType) {
- case LONG: sketch = hllBuilder.buildForLongs(); break;
- case INTEGER: sketch = hllBuilder.buildForIntegers(); break;
- case STRING: sketch = hllBuilder.buildForStrings(); break;
- case BYTES: sketch = hllBuilder.buildForBytes(); break;
- }
- }
-
- @Override
- public void doTrial() {
- final int qArrLen = qArr.length;
- reset();
- long lastUniques = 0;
- for (int i = 0; i < qArrLen; i++) {
- final AccuracyStats q = qArr[i];
- final long delta = (long)(q.trueValue - lastUniques);
- for (long u = 0; u < delta; u++) {
- sketch.add(++vIn);
- }
- lastUniques += delta;
- final double est = sketch.result();
- q.update(est);
- }
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java
deleted file mode 100644
index 3324944..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import java.util.Random;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllMergeAccuracyProfile implements JobProfile {
- private HyperLogLogPlusPlus.Builder hllBuilder = new
HyperLogLogPlusPlus.Builder();
- private HyperLogLogPlusPlus<Long> target;
-
- private static final Random random = new Random();
-
- private Job job;
- private int lgK;
- private int numTrials;
- private int numSketches;
- private int distinctKeysPerSketch;
-
- @Override
- public void start(final Job job) {
- this.job = job;
- lgK = Integer.parseInt(job.getProperties().mustGet("lgK"));
- numTrials = Integer.parseInt(job.getProperties().mustGet("numTrials"));
- numSketches = Integer.parseInt(job.getProperties().mustGet("numSketches"));
- distinctKeysPerSketch =
Integer.parseInt(job.getProperties().mustGet("distinctKeysPerSketch"));
- runMergeTrials();
- }
-
- private HyperLogLogPlusPlus<Long> newSketch(final int lgK) {
- final int lgSP = Math.min(lgK + 5, 25);
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- return hllBuilder.buildForLongs();
- }
-
- @Override
- public void shutdown() { }
-
- @Override
- public void cleanup() { }
-
- private void runMergeTrials() {
- long key = random.nextLong();
-
- final double trueCount = numSketches * distinctKeysPerSketch;
- double sumEstimates = 0;
- double sumOfSquaredDeviationsFromTrueCount = 0;
-
- for (int t = 0; t < numTrials; t++) {
- target = newSketch(lgK);
-
- for (int s = 0; s < numSketches; s++) {
- final HyperLogLogPlusPlus<Long> sketch = newSketch(lgK);
- for (int k = 0; k < distinctKeysPerSketch; k++) {
- sketch.add(key++);
- }
- target.merge(sketch);
- }
- final double estimatedCount = target.result();
- sumEstimates += estimatedCount;
- sumOfSquaredDeviationsFromTrueCount += (estimatedCount - trueCount) *
(estimatedCount - trueCount);
- }
- final double meanEstimate = sumEstimates / numTrials;
- final double meanRelativeError = meanEstimate / trueCount - 1;
- final double relativeStandardError
- = Math.sqrt(sumOfSquaredDeviationsFromTrueCount / numTrials) / trueCount;
- job.println("True count: " + trueCount);
- job.println("Mean estimate: " + meanEstimate);
- job.println("Mean Relative Error: " + meanRelativeError);
- job.println("Relative Standard Error: " + relativeStandardError);
- }
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java
deleted file mode 100644
index 4dc4eff..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import
org.apache.datasketches.characterization.uniquecount.BaseMergeSpeedProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllMergeSpeedProfile extends BaseMergeSpeedProfile {
- private HyperLogLogPlusPlus.Builder hllBuilder;
- private HyperLogLogPlusPlus<Long> target;
-
- @Override
- public void configure() {
- hllBuilder = new HyperLogLogPlusPlus.Builder();
- }
-
- @Override
- public void resetMerge(final int lgK) {
- target = newSketch(lgK);
- }
-
- private HyperLogLogPlusPlus<Long> newSketch(final int lgK) {
- final int lgSP = Math.min(lgK + 5, 25);
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- return hllBuilder.buildForLongs();
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void doTrial(final Stats stats, final int lgK, final int lgDeltaU) {
- final int U = 1 << (lgK + lgDeltaU);
- long start;
- long serTime_nS = 0;
- long deserTime_nS = 0;
- long mergeTime_nS = 0;
- final HyperLogLogPlusPlus<Long> source = newSketch(lgK);
- for (int u = 0; u < U; u++) { source.add(++vIn); }
- final HyperLogLogPlusPlus<Long> source2;
-
- if (serDe) {
- //Serialize
- start = System.nanoTime();
- final byte[] byteArr = source.serializeToByteArray();
- serTime_nS += System.nanoTime() - start;
- //Deserialize
- start = System.nanoTime();
- source2 = (HyperLogLogPlusPlus<Long>)
HyperLogLogPlusPlus.forProto(byteArr);
- deserTime_nS += System.nanoTime() - start;
- //Merge
- start = System.nanoTime();
- target.merge(source2);
- mergeTime_nS += System.nanoTime() - start;
-
- } else {
- //Merge
- start = System.nanoTime();
- target.merge(source);
- mergeTime_nS += System.nanoTime() - start;
- }
-
- stats.serializeTime_nS = serTime_nS;
- stats.deserializeTime_nS = deserTime_nS;
- stats.mergeTime_nS = mergeTime_nS;
- stats.totalTime_nS = deserTime_nS + mergeTime_nS;
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java
deleted file mode 100644
index 50dac8a..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import org.apache.datasketches.characterization.uniquecount.BaseSerDeProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllSerDeProfile extends BaseSerDeProfile {
- private enum ZetaType { LONG, INTEGER, STRING, BYTES }
-
- private HyperLogLogPlusPlus<?> sketch1;
- private HyperLogLogPlusPlus<?> sketch2;
- private HyperLogLogPlusPlus.Builder hllBuilder;
-
- private int lgSP;
- private String zetaType;
- private ZetaType zType;
-
- @Override
- public void configure() {
- lgSP = Integer.parseInt(prop.mustGet("LgSP"));
- zetaType = prop.mustGet("ZetaType");
- hllBuilder = new HyperLogLogPlusPlus.Builder();
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- if (zetaType.equals("LONG")) {
- zType = ZetaType.LONG;
- } else if (zetaType.equals("INTEGER")) {
- zType = ZetaType.INTEGER;
- } else if (zetaType.equals("STRING")) {
- zType = ZetaType.STRING;
- } else if (zetaType.equals("BYTES")) {
- zType = ZetaType.BYTES;
- }
- reset();
- }
-
- private void reset() {
- switch (zType) {
- case LONG:
- sketch1 = hllBuilder.buildForLongs();
- sketch2 = hllBuilder.buildForLongs();
- break;
- case INTEGER:
- sketch1 = hllBuilder.buildForIntegers();
- sketch2 = hllBuilder.buildForIntegers();
- break;
- case STRING:
- sketch1 = hllBuilder.buildForStrings();
- sketch2 = hllBuilder.buildForStrings();
- break;
- case BYTES:
- sketch1 = hllBuilder.buildForBytes();
- sketch2 = hllBuilder.buildForBytes();
- break;
- }
- }
-
- @Override
- public void doTrial(final long[] stats, final int uPerTrial) {
- reset();
-
- for (int u = uPerTrial; u-- > 0;) {
- sketch1.add(++vIn);
- }
-
- final long startEstTime_nS = System.nanoTime();
- final double est1 = sketch1.result();
-
- final long startSerTime_nS = System.nanoTime();
- final byte[] byteArr = sketch1.serializeToByteArray();
-
- final long startDeSerTime_nS = System.nanoTime();
- sketch2 = HyperLogLogPlusPlus.forProto(byteArr);
- final long endTime_nS = System.nanoTime();
-
- final double est2 = sketch2.result();
- assert est1 == est2;
-
- stats[est_ns] = startSerTime_nS - startEstTime_nS;
- stats[ser_ns] = startDeSerTime_nS - startSerTime_nS;
- stats[deser_ns] = endTime_nS - startDeSerTime_nS;
- stats[size_bytes] = byteArr.length;
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUnionUpdateSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUnionUpdateSpeedProfile.java
deleted file mode 100644
index 79ff798..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUnionUpdateSpeedProfile.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import java.lang.reflect.Array;
-
-import
org.apache.datasketches.characterization.uniquecount.BaseUpdateSpeedProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllUnionUpdateSpeedProfile extends BaseUpdateSpeedProfile {
- private int lgK;
- private int numSketches;
- private HyperLogLogPlusPlus.Builder hllBuilder;
- private HyperLogLogPlusPlus<Long>[] sketches;
- private HyperLogLogPlusPlus<Long> union;
-
- @SuppressWarnings("unchecked")
- @Override
- public void configure() {
- lgK = Integer.parseInt(prop.mustGet("LgK"));
- numSketches = Integer.parseInt(prop.mustGet("NumSketches"));
- hllBuilder = new HyperLogLogPlusPlus.Builder();
- sketches = (HyperLogLogPlusPlus<Long>[])
Array.newInstance(HyperLogLogPlusPlus.class, numSketches);
- }
-
- @Override
- public double doTrial(final int uPerTrial) {
- for (int i = 0; i < numSketches; i++) {
- sketches[i] = newSketch(lgK);
- }
-
- { // spray values across all sketches
- int i = 0;
- for (int u = uPerTrial; u-- > 0;) {
- sketches[i++].add(++vIn);
- if (i == numSketches) { i = 0; }
- }
- }
-
- union = newSketch(lgK);
- final long startUpdateTime_nS = System.nanoTime();
-
- for (int i = numSketches; i-- > 0;) {
- union.merge(sketches[i]);
- }
-
- final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
- return updateTime_nS;
- }
-
- private HyperLogLogPlusPlus<Long> newSketch(final int lgK) {
- final int lgSP = Math.min(lgK + 5, 25);
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- return hllBuilder.buildForLongs();
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java
deleted file mode 100644
index 14b730c..0000000
---
a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.hll;
-
-import
org.apache.datasketches.characterization.uniquecount.BaseUpdateSpeedProfile;
-
-import com.google.zetasketch.HyperLogLogPlusPlus;
-
-/**
- * @author Lee Rhodes
- */
-public class ZetaHllUpdateSpeedProfile extends BaseUpdateSpeedProfile {
- private enum ZetaType { LONG, INTEGER, STRING, BYTES }
-
- private HyperLogLogPlusPlus<?> sketch;
- private HyperLogLogPlusPlus.Builder hllBuilder;
- private int lgK;
- private int lgSP;
- private String zetaType;
- private ZetaType zType;
-
- @Override
- public void configure() {
- lgK = Integer.parseInt(prop.mustGet("LgK"));
- lgSP = Integer.parseInt(prop.mustGet("LgSP"));
- zetaType = prop.mustGet("ZetaType");
- hllBuilder = new HyperLogLogPlusPlus.Builder();
- hllBuilder.normalPrecision(lgK);
- hllBuilder.sparsePrecision(lgSP);
- if (zetaType.equals("LONG")) {
- zType = ZetaType.LONG;
- } else if (zetaType.equals("INTEGER")) {
- zType = ZetaType.INTEGER;
- } else if (zetaType.equals("STRING")) {
- zType = ZetaType.STRING;
- } else if (zetaType.equals("BYTES")) {
- zType = ZetaType.BYTES;
- }
- reset();
- }
-
- private void reset() {
- switch (zType) {
- case LONG: sketch = hllBuilder.buildForLongs(); break;
- case INTEGER: sketch = hllBuilder.buildForIntegers(); break;
- case STRING: sketch = hllBuilder.buildForStrings(); break;
- case BYTES: sketch = hllBuilder.buildForBytes(); break;
- }
- }
-
- @Override
- public double doTrial(final int uPerTrial) {
- reset();
- final long startUpdateTime_nS = System.nanoTime();
-
- for (int u = uPerTrial; u-- > 0;) {
- sketch.add(++vIn);
- }
- final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
- return (double) updateTime_nS / uPerTrial;
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
index f788548..6623a1d 100644
---
a/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
@@ -21,8 +21,8 @@ package org.apache.datasketches.characterization.kll;
import static java.lang.Math.round;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
-import static org.apache.datasketches.Util.evenlySpaced;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+import static
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
@@ -173,12 +173,12 @@ public class KllDoublesSketchRankGaussianAccuracyProfile
implements JobProfile {
for (int sl = 1; sl <= streamLength; sl++) { stream[sl - 1] = sl; } //1 to
SL
//compute the true values used at the plot points
- final double start = 1.0f;
+ final double start = 1.0;
final double end = streamLength;
- final double[] fltValues = evenlySpaced(start, end, numPlotPoints);
+ final double[] dblValues = evenlySpacedDoubles(start, end, numPlotPoints);
for (int pp = 0; pp < numPlotPoints; pp++) {
- trueValues[pp] = round(fltValues[pp]);
+ trueValues[pp] = round(dblValues[pp]);
corrTrueValues[pp] = trueValues[pp] - trueValueCorrection;
}
diff --git
a/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
index f67892f..c9cc92e 100644
---
a/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
@@ -21,8 +21,8 @@ package org.apache.datasketches.characterization.kll;
import static java.lang.Math.round;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
-import static org.apache.datasketches.Util.evenlySpacedFloats;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+import static
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedFloats;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/DoublesSketchSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/DoublesSketchSpeedProfile.java
index a6d2be2..3ae2934 100644
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/DoublesSketchSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/quantiles/DoublesSketchSpeedProfile.java
@@ -34,9 +34,9 @@ public class DoublesSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
private static final Random rnd = new Random();
private int k;
private DoublesSketchBuilder builder;
- private double[] inputValues;
+ private double[] randomInput;
private int numQueryValues;
- private double[] queryValues;
+ private double[] orderedLittleDoubles;
private boolean useDirect;
private WritableMemory updateSketchMemory;
private WritableMemory compactSketchMemory;
@@ -69,16 +69,16 @@ public class DoublesSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
@Override
public void prepareTrial(final int streamLength) {
// prepare input data
- inputValues = new double[streamLength];
+ randomInput = new double[streamLength];
for (int i = 0; i < streamLength; i++) {
- inputValues[i] = rnd.nextDouble();
+ randomInput[i] = rnd.nextDouble();
}
// prepare query data that must be ordered
- queryValues = new double[numQueryValues];
+ orderedLittleDoubles = new double[numQueryValues];
for (int i = 0; i < numQueryValues; i++) {
- queryValues[i] = rnd.nextDouble();
+ orderedLittleDoubles[i] = rnd.nextDouble();
}
- Arrays.sort(queryValues);
+ Arrays.sort(orderedLittleDoubles);
if (useDirect) {
updateSketchMemory = WritableMemory
.writableWrap(new byte[DoublesSketch.getUpdatableStorageBytes(k,
streamLength)]);
@@ -91,7 +91,7 @@ public class DoublesSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
@SuppressWarnings("unused")
@Override
public void doTrial() {
- DoublesSketchAccuracyProfile.shuffle(inputValues);
+ DoublesSketchAccuracyProfile.shuffle(randomInput);
final long startBuild = System.nanoTime();
final UpdateDoublesSketch updateSketch = useDirect
@@ -101,28 +101,25 @@ public class DoublesSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
buildTimeNs += stopBuild - startBuild;
final long startUpdate = System.nanoTime();
- for (int i = 0; i < inputValues.length; i++) {
- updateSketch.update(inputValues[i]);
+ for (int i = 0; i < randomInput.length; i++) {
+ updateSketch.update(randomInput[i]);
}
final long stopUpdate = System.nanoTime();
updateTimeNs += stopUpdate - startUpdate;
{
final long startGetQuantiles = System.nanoTime();
- updateSketch.getQuantiles(numQueryValues);
+ updateSketch.getQuantiles(orderedLittleDoubles);
final long stopGetQuantiles = System.nanoTime();
updateGetQuantilesTimeNs += stopGetQuantiles - startGetQuantiles;
final long startGetCdf = System.nanoTime();
- updateSketch.getCDF(queryValues);
+ updateSketch.getCDF(orderedLittleDoubles);
final long stopGetCdf = System.nanoTime();
updateGetCdfTimeNs += stopGetCdf - startGetCdf;
final long startGetRank = System.nanoTime();
- for (final double value: queryValues) {
- //updateSketch.getRank(value); //TODO this was not released yet
- final double estRank = updateSketch.getCDF(new double[] {value})[0];
- }
+ final double[] estRanks = updateSketch.getRanks(orderedLittleDoubles);
final long stopGetRank = System.nanoTime();
updateGetRankTimeNs += stopGetRank - startGetRank;
@@ -155,17 +152,17 @@ public class DoublesSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
{
final long startGetQuantiles = System.nanoTime();
- compactSketch.getQuantiles(numQueryValues);
+ compactSketch.getQuantiles(orderedLittleDoubles);
final long stopGetQuantiles = System.nanoTime();
compactGetQuantilesTimeNs += stopGetQuantiles - startGetQuantiles;
final long startGetCdf = System.nanoTime();
- compactSketch.getCDF(queryValues);
+ compactSketch.getCDF(orderedLittleDoubles);
final long stopGetCdf = System.nanoTime();
compactGetCdfTimeNs += stopGetCdf - startGetCdf;
final long startGetRank = System.nanoTime();
- for (final double value: queryValues) {
+ for (final double value: orderedLittleDoubles) {
//compactSketch.getRank(value);
final double estRank = compactSketch.getCDF(new double[] {value})[0];
}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/DruidAppHistStreamAProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/DruidAppHistStreamAProfile.java
deleted file mode 100644
index ed846f3..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/DruidAppHistStreamAProfile.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles;
-
-import static
org.apache.datasketches.characterization.ProfileUtil.buildSplitPointsArr;
-
-import java.io.File;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-import org.apache.datasketches.LineReader;
-import org.apache.datasketches.ProcessLine;
-import org.apache.datasketches.Properties;
-import org.apache.datasketches.UnzipFiles;
-import org.apache.druid.query.aggregation.histogram.ApproximateHistogram;
-import org.apache.druid.query.aggregation.histogram.Histogram;
-//import org.testng.annotations.Test;
-
-/**
- * @author Lee Rhodes
- */
-public class DruidAppHistStreamAProfile implements JobProfile {
-
- private Job job;
-
- //Properties
- private String srcFileName;
- private int reportInterval; //prints number of lines read to console every
reportInterval lines.
- private int numRanks; //number of linearly spaced ranks between zero and one.
- private int pplb; //number of split-Points Per Log Base.
- private double logBase; //Log Base
- private int histSize; //ApproximateHistogram size. Max # of position, bin
pairs
- private String cdfHdr;
- private String cdfFmt;
- private String pmfHdr;
- private String pmfFmt;
-
- ApproximateHistogram ahist;
- Histogram hist;
-
- private boolean dataWasZipped = false;
- private double eps = 1e-6;
- private Process proc = new Process();
-
- //outputs for plotting
- private double minV;
- private double maxV;
-
- private float[] spArr; //splitpoints 0, 1 ... ceilPwr2(maxValue), ppo
values per octave
- private int numSP;
- private double numItems;
-
-
- //JobProfile
- @Override
- public void start(final Job job) {
- this.job = job;
- final Properties prop = job.getProperties();
- //Get Properties
- srcFileName = prop.mustGet("FileName");
- reportInterval = Integer.parseInt(prop.mustGet("ReportInterval"));
- numRanks = Integer.parseInt(prop.mustGet("NumRanks"));
- logBase = Double.parseDouble(prop.mustGet("LogBase"));
- pplb = Integer.parseInt(prop.mustGet("PPLB"));
-
- histSize = Integer.parseInt(prop.mustGet("HistSize"));
-
- cdfHdr = prop.mustGet("CdfHdr").replace("\\t", "\t");
- cdfFmt = prop.mustGet("CdfFmt").replace("\\t", "\t");
- pmfHdr = prop.mustGet("PdfHdr").replace("\\t", "\t");
- pmfFmt = prop.mustGet("PdfFmt").replace("\\t", "\t");
-
- ahist = new ApproximateHistogram(histSize);
-
- processInputStream();
-
- if (dataWasZipped) {
- final File file = new File(srcFileName);
- if (file.exists()) { file.delete(); }
- }
- }
-
- /**
- * Read file, Print CDF, Print PMF.
- */
- private void processInputStream() {
- checkIfZipped(srcFileName);
-
- //Read
- job.println("Input Lines Processed: ");
- final LineReader lineReader = new LineReader(srcFileName);
-
- final long startReadTime_nS = System.nanoTime();
- lineReader.read(0, proc);
- final long readTime_nS = System.nanoTime() - startReadTime_nS;
-
- //print hist stats
- job.println(ahist.toString().replace(", ", "\n").replace("*", ""));
- job.println("Max Storage Size: " + ahist.getMaxStorageSize());
-
- //CDF
- final float[] fracRanks = buildRanksArr(numRanks);
- final long startCdfTime_nS = System.nanoTime();
- final float[] quantiles = ahist.getQuantiles(fracRanks);
- final long cdfTime_nS = System.nanoTime() - startCdfTime_nS;
-
- job.println("");
- job.println("CDF");
- job.println(String.format(cdfHdr, "Index", "Rank", "Quantile"));
- for (int i = 0; i < numRanks; i++) {
- final String s = String.format(cdfFmt, i, fracRanks[i],
(int)quantiles[i]);
- job.println(s);
- }
- job.println("");
-
- //print PMF histogram, using Points Per Log Base.
- minV = ahist.getMin();
- maxV = ahist.getMax();
- numItems = ahist.count();
- final double[] splitpoints = buildSplitPointsArr(minV, maxV, pplb,
logBase, eps);
- numSP = splitpoints.length;
- spArr = new float[numSP];
- for (int i = 0; i < numSP; i++) { spArr[i] = (float) splitpoints[i]; }
-
- //PMF
- final long startPmfTime_nS = System.nanoTime();
- final Histogram hist = ahist.toHistogram(spArr);
-
- final double[] breaksArr = hist.getBreaks();
- final double[] countsArr = hist.getCounts();
- final long pmfTime_nS = System.nanoTime() - startPmfTime_nS;
- final int lenBreaks = breaksArr.length;
-
- job.println("PMF");
- job.println(String.format(pmfHdr, "Index", "Quantile", "Mass"));
- int i;
- for (i = 0; i < lenBreaks - 1; i++) {
- job. println(String.format(pmfFmt, i, breaksArr[i], countsArr[i]));
- }
- job.println(String.format(pmfFmt, i, breaksArr[i], 0.0)); // the last point
-
- final double readTime_S = readTime_nS / 1E9;
- job.println("");
- job.println(String.format("ReadTime_Sec :\t%10.3f", readTime_S));
- job.println(String.format("ReadRate/Sec :\t%,10.0f", numItems /
readTime_S));
- job.println(String.format("CdfTime_mSec :\t%10.3f", cdfTime_nS / 1E6));
- job.println(String.format("Cdf/Point_nSec:\t%10.3f", (double)cdfTime_nS /
numRanks));
- job.println(String.format("PmfTime_mSec :\t%10.3f", pmfTime_nS / 1E6));
- job.println(String.format("Pmf/Point_nSec:\t%10.3f", (double)pmfTime_nS /
lenBreaks));
- }
-
- /**
- * Compute the ranks array.
- * @param numRanks the number of evenly-spaced rank values excluding 0 and
1.0.
- * @return the ranks array
- */
- private static float[] buildRanksArr(final int numRanks) {
- final float[] fractions = new float[numRanks];
- final double delta = 1.0 / (numRanks + 1);
- for (int i = 1; i <= numRanks; i++) {
- fractions[i - 1] = (float) (delta * i);
- }
- return fractions;
- }
-
- //@Test
- public void checkRanks() {
- final int num = 3;
- final float[] arr = buildRanksArr(num);
- for (int i = 0; i < num; i++) { System.out.println(arr[i]); }
- }
-
- private void checkIfZipped(final String srcFileName) {
- final File file = new File(srcFileName);
- if (!file.exists()) {
- final String srcZipFile = srcFileName + ".zip";
- final File zipFile = new File(srcZipFile);
- if (!zipFile.exists()) {
- throw new IllegalArgumentException("Neither file nor zipFile exists.");
- }
- final String parent = zipFile.getParent();
- job.println("Unzipping data file: " + srcZipFile + "...");
- UnzipFiles.unzip(srcZipFile, parent);
- if (!zipFile.exists()) {
- throw new IllegalArgumentException("Unsuccessful Unzip.");
- }
- job.println(srcZipFile + " unzipped!");
- dataWasZipped = true;
- }
- }
-
- @Override
- public void shutdown() {}
-
- @Override
- public void cleanup() {}
-
- // Callback
- class Process implements ProcessLine {
- int n = 0;
-
- @Override
- public void process(final String strArr0, final int lineNo) {
- if (lineNo % reportInterval == 0) {
- job.println("" + lineNo);
- }
- final long v = Long.parseLong(strArr0);
- ahist.offer(v);
- n++;
- }
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/ItemsSketchSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/ItemsSketchSpeedProfile.java
index a70ebfd..f1b62cb 100644
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/ItemsSketchSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/quantiles/ItemsSketchSpeedProfile.java
@@ -34,9 +34,10 @@ public class ItemsSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
private static final ArrayOfDoublesSerDe SERDE = new ArrayOfDoublesSerDe();
private static final Random rnd = new Random();
private int k;
- private double[] inputValues;
+ private Double[] randInput;
private int numQueryValues;
- private Double[] queryValues;
+ private Double[] orderedBigDoubles;
+ private double[] orderedLittleDoubles;
long buildTimeNs;
long updateTimeNs;
@@ -57,23 +58,27 @@ public class ItemsSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
@Override
public void prepareTrial(final int streamLength) {
// prepare input data
- inputValues = new double[streamLength];
+ randInput = new Double[streamLength]; //random input doubles [0,1]
for (int i = 0; i < streamLength; i++) {
- inputValues[i] = rnd.nextDouble();
+ randInput[i] = rnd.nextDouble();
}
// prepare query data that must be ordered
- queryValues = new Double[numQueryValues];
- for (int i = 0; i < numQueryValues; i++) {
- queryValues[i] = rnd.nextDouble();
+ orderedLittleDoubles = new double[numQueryValues];
+ orderedBigDoubles = new Double[numQueryValues];
+ for (int i = 0; i < numQueryValues; i++) { //create the little d's
+ orderedLittleDoubles[i] = rnd.nextDouble();
+ }
+ Arrays.sort(orderedLittleDoubles); //sort the little d's
+ for (int i = 0; i < numQueryValues; i++) { //copy to the big D's
+ orderedBigDoubles[i] = orderedLittleDoubles[i];
}
- Arrays.sort(queryValues);
resetStats();
}
@SuppressWarnings("unused")
@Override
public void doTrial() {
- DoublesSketchAccuracyProfile.shuffle(inputValues);
+ shuffle(randInput);
final long startBuild = System.nanoTime();
final ItemsSketch<Double> sketch = ItemsSketch.getInstance(Double.class,
k, COMPARATOR);
@@ -81,29 +86,26 @@ public class ItemsSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
buildTimeNs += stopBuild - startBuild;
final long startUpdate = System.nanoTime();
- for (int i = 0; i < inputValues.length; i++) {
- sketch.update(inputValues[i]);
+ for (int i = 0; i < randInput.length; i++) {
+ sketch.update(randInput[i]);
}
final long stopUpdate = System.nanoTime();
updateTimeNs += stopUpdate - startUpdate;
final long startGetQuantiles = System.nanoTime();
- sketch.getQuantiles(numQueryValues);
+ sketch.getQuantiles(orderedLittleDoubles);
final long stopGetQuantiles = System.nanoTime();
getQuantilesTimeNs += stopGetQuantiles - startGetQuantiles;
final long startGetCdf = System.nanoTime();
- sketch.getCDF(queryValues);
+ sketch.getCDF(orderedBigDoubles);
final long stopGetCdf = System.nanoTime();
getCdfTimeNs += stopGetCdf - startGetCdf;
- final long startGetRank = System.nanoTime();
- for (final double value: queryValues) {
- //sketch.getRank(value); //TODO this was not released yet
- final double estRank = sketch.getCDF(new Double[] {value})[0];
- }
+ final long startGetRanks = System.nanoTime();
+ final double[] estRanks = sketch.getRanks(orderedBigDoubles);
final long stopGetRank = System.nanoTime();
- getRankTimeNs += stopGetRank - startGetRank;
+ getRankTimeNs += stopGetRank - startGetRanks;
final long startSerialize = System.nanoTime();
final byte[] bytes = sketch.toByteArray(SERDE);
@@ -156,4 +158,16 @@ public class ItemsSketchSpeedProfile extends
BaseQuantilesSpeedProfile {
serializedSizeBytes = 0;
}
+ static void shuffle(final Double[] array) {
+ for (int i = 0; i < array.length; i++) {
+ final int r = rnd.nextInt(i + 1);
+ swap(array, i, r);
+ }
+ }
+
+ private static void swap(final Double[] array, final int i1, final int i2) {
+ final Double value = array[i1];
+ array[i1] = array[i2];
+ array[i2] = value;
+ }
}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/MSketchStreamAProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/MSketchStreamAProfile.java
deleted file mode 100644
index d3a33f2..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/MSketchStreamAProfile.java
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles;
-
-import static java.lang.Math.rint;
-
-import java.io.File;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-import org.apache.datasketches.LineReader;
-import org.apache.datasketches.ProcessLine;
-import org.apache.datasketches.Properties;
-import org.apache.datasketches.UnzipFiles;
-import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
-
-/**
- * @author Lee Rhodes
- */
-public class MSketchStreamAProfile implements JobProfile {
-
- private Job job;
-
- //Properties
- private String srcFileName;
- private int reportInterval; //prints number of lines read to console every
reportInterval lines.
- private int numRanks; //number of linearly spaced ranks between zero and one.
- //private int ppOoM; //number of split-points per Order-Of-Magnitude (OOM).
- private int moments; //number of moments
- private String cdfHdr;
- private String cdfFmt;
- //private String pmfHdr;
- //private String pmfFmt;
-
- MomentSketchWrapper sketch;
-
- private boolean dataWasZipped = false;
- //private double eps = 1e-6;
- private Process proc = new Process();
-
- //outputs for plotting
- private double minV;
- private double maxV;
- private long numItems = 0;
-
- //JobProfile
- @Override
- public void start(final Job job) {
- this.job = job;
- final Properties prop = job.getProperties();
- //Get Properties
- srcFileName = prop.mustGet("FileName");
- reportInterval = Integer.parseInt(prop.mustGet("ReportInterval"));
- numRanks = Integer.parseInt(prop.mustGet("NumRanks"));
- //logBase = Double.parseDouble(prop.mustGet("LogBase"));
- //pplb = Integer.parseInt(prop.mustGet("PPLB"));
-
- moments = Integer.parseInt(prop.mustGet("Moments"));
-
- cdfHdr = prop.mustGet("CdfHdr").replace("\\t", "\t");
- cdfFmt = prop.mustGet("CdfFmt").replace("\\t", "\t");
- //pmfHdr = prop.mustGet("PdfHdr").replace("\\t", "\t");
- //pmfFmt = prop.mustGet("PdfFmt").replace("\\t", "\t");
-
- sketch = new MomentSketchWrapper(moments);
-
- processInputStream();
-
- if (dataWasZipped) {
- final File file = new File(srcFileName);
- if (file.exists()) { file.delete(); }
- }
- }
-
- /**
- * Read file, Print CDF, Print PMF.
- */
- private void processInputStream() {
- checkIfZipped(srcFileName);
-
- //Read
- job.println("");
- job.println("Input Lines Processed: ");
- final LineReader lineReader = new LineReader(srcFileName);
-
- final long startReadTime_nS = System.nanoTime();
- lineReader.read(0, proc);
- final long readTime_nS = System.nanoTime() - startReadTime_nS;
- numItems = proc.n;
- job.println("");
-
- //print sketch stats
- job.println("Sketch.toString()");
- job.println(sketch.toString().replace(", ", "\n"));
- minV = sketch.getMin();
- maxV = sketch.getMax();
- job.println("Min: " + minV);
- job.println("Max: " + maxV);
- job.println("Size: " + sketch.toByteArray().length);
- job.println("NumItems: " + numItems);
- job.println("");
-
- //CDF
- final double[] fracRanks = buildRanksArr(numRanks);
- final long startCdfTime_nS = System.nanoTime();
- final double[] quantiles = sketch.getQuantiles(fracRanks);
- final long cdfTime_nS = System.nanoTime() - startCdfTime_nS;
-
- job.println("CDF");
- job.println(String.format(cdfHdr, "Index", "Rank", "Quantile"));
- for (int i = 0; i < numRanks; i++) {
- final String s = String.format(cdfFmt, i, fracRanks[i], quantiles[i]);
- job.println(s);
- }
- job.println("");
-
-
- final double readTime_S = readTime_nS / 1E9;
-
- job.println(String.format("ReadTime_Sec :\t%10.3f", readTime_S));
- job.println(String.format("ReadRate/Sec :\t%,10.0f", numItems /
readTime_S));
- job.println(String.format("CdfTime_mSec :\t%10.3f", cdfTime_nS / 1E6));
- job.println(String.format("Cdf/Point_nSec:\t%10.3f", (double)cdfTime_nS /
numRanks));
- //println(String.format("PmfTime_mSec :\t%10.3f", pmfTime_nS / 1E6));
- //println(String.format("Pmf/Point_nSec:\t%10.3f", (double)pmfTime_nS /
lenPMF));
- }
-
- /**
- * Compute the ranks array.
- * @param numRanks the number of evenly-spaced rank values including 0 and
1.0.
- * @return the ranks array
- */
- private static double[] buildRanksArr(final int numRanks) {
- final int numRM1 = numRanks - 1;
- final double[] fractions = new double[numRanks];
- final double delta = 1.0 / numRM1;
- double d = 0.0;
- for (int i = 0; i < numRanks; i++) {
- fractions[i] = d;
- d += delta;
- d = rint(d * numRM1) / numRM1;
- }
- return fractions;
- }
-
- private void checkIfZipped(final String srcFileName) {
- final File file = new File(srcFileName);
- if (!file.exists()) {
- final String srcZipFile = srcFileName + ".zip";
- final File zipFile = new File(srcZipFile);
- if (!zipFile.exists()) {
- throw new IllegalArgumentException("Neither file nor zipFile exists.");
- }
- final String parent = zipFile.getParent();
- job.println("Unzipping data file: " + srcZipFile + "...");
- UnzipFiles.unzip(srcZipFile, parent);
- if (!zipFile.exists()) {
- throw new IllegalArgumentException("Unsuccessful Unzip.");
- }
- job.println(srcZipFile + " unzipped!");
- dataWasZipped = true;
- }
- }
-
- @Override
- public void shutdown() {}
-
- @Override
- public void cleanup() {}
-
- // Callback
- class Process implements ProcessLine {
- int n;
-
- @Override
- public void process(final String strArr0, final int lineNo) {
- if (lineNo % reportInterval == 0) {
- job.println("" + lineNo);
- }
- final long v = Long.parseLong(strArr0);
- sketch.add(v);
- n++;
- }
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DataGenerator.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DataGenerator.java
deleted file mode 100644
index fc2af33..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DataGenerator.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-import java.util.Random;
-
-public class DataGenerator {
-
- public static enum Mode { Same, Blocky, Uniform, Gaussian }
-
- // starting probability of incrementing a value for Blocky mode
- private static final double PROBABILITY_OF_INCREMENT = 0.001;
- // factor of decrising the probability of incrementing a value for Blocky
mode
- private static final double DECREASE_FACTOR = 0.98;
-
- private static final Random rnd = new Random();
-
- private final Mode mode;
-
- public DataGenerator(final Mode mode) {
- this.mode = mode;
- }
-
- public void fillArray(final float[] array) {
- int i = 0;
- int value = 1;
- double p = PROBABILITY_OF_INCREMENT;
- while (i < array.length) {
- if (Mode.Gaussian.equals(mode)) {
- array[i++] = (float) rnd.nextGaussian();
- } else if (Mode.Uniform.equals(mode)) {
- array[i++] = rnd.nextFloat();
- } else {
- array[i++] = value;
- // growing blocks of repeated values
- if (Mode.Blocky.equals(mode) && (rnd.nextDouble() < p)) {
- value++;
- p *= DECREASE_FACTOR; // decrease the probability slightly so that
blocks get longer
- }
- }
- }
- }
-
- public void fillArray(final double[] array) {
- int i = 0;
- int value = 1;
- double p = 0.001; // starting probability of incrementing a value for
Blocky mode
- while (i < array.length) {
- if (Mode.Gaussian.equals(mode)) {
- array[i++] = rnd.nextGaussian();
- } else if (Mode.Uniform.equals(mode)) {
- array[i++] = rnd.nextDouble();
- } else {
- array[i++] = value;
- // growing blocks of repeated values
- if (Mode.Blocky.equals(mode) && (rnd.nextDouble() < p)) {
- value++;
- p *= .98; // decrease the probability slightly so that blocks get
longer
- }
- }
- }
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DoubleRankCalculator.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DoubleRankCalculator.java
deleted file mode 100644
index 964963e..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/DoubleRankCalculator.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-public class DoubleRankCalculator {
-
- public enum Mode { Min, Mid, Max }
-
- private final double[] values;
- private final Mode mode;
- private int nLess;
- private int nLessOrEq;
-
- // assumes that values are sorted
- public DoubleRankCalculator(final double[] values, final Mode mode) {
- this.values = values;
- this.mode = mode;
- }
-
- public double getRank(final double value) {
- if (Mode.Min.equals(mode) || Mode.Mid.equals(mode)) {
- while ((nLess < values.length) && (values[nLess] < value)) {
- nLess++;
- }
- }
- if (Mode.Max.equals(mode) || Mode.Mid.equals(mode)) {
- while ((nLessOrEq < values.length) && (values[nLessOrEq] <= value)) {
- nLessOrEq++;
- }
- }
- if (Mode.Min.equals(mode)) { return (double) nLess / values.length; }
- if (Mode.Max.equals(mode)) { return (double) nLessOrEq / values.length; }
- return (nLess + nLessOrEq) / 2.0 / values.length;
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
deleted file mode 100644
index 9c5fd30..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-import static org.apache.datasketches.common.Util.pwr2SeriesNext;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-import org.apache.datasketches.MonotonicPoints;
-import org.apache.datasketches.Properties;
-import org.apache.datasketches.quantiles.DoublesSketch;
-import org.apache.datasketches.quantiles.DoublesSketchBuilder;
-import org.apache.datasketches.quantiles.UpdateDoublesSketch;
-
-public abstract class QuantilesAccuracyProfile implements JobProfile {
-
- Job job;
- private DoublesSketchBuilder builder;
-
- @Override
- public void start(final Job job) {
- this.job = job;
- doTrials();
- }
-
- private void doTrials() {
- final int lgMin = Integer.parseInt(job.getProperties().mustGet("lgMin"));
- final int lgMax = Integer.parseInt(job.getProperties().mustGet("lgMax"));
- final int ppo = Integer.parseInt(job.getProperties().mustGet("PPO"));
- final int numTrials =
Integer.parseInt(job.getProperties().mustGet("trials"));
-
- final int errorSketchLgK =
Integer.parseInt(job.getProperties().mustGet("errLgK"));
- final int errorPct =
Integer.parseInt(job.getProperties().mustGet("errPct"));
-
- builder = DoublesSketch.builder().setK(1 << errorSketchLgK);
-
- configure(job.getProperties());
-
- job.println("StreamLength\tError");
-
- final int numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
- int streamLength = 1 << lgMin;
- for (int i = 0; i < numSteps; i++) {
- prepareTrial(streamLength);
- final UpdateDoublesSketch rankErrorSketch = builder.build();
- for (int t = 0; t < numTrials; t++) {
- final double maxRankErrorInTrial = doTrial();
- rankErrorSketch.update(maxRankErrorInTrial);
- }
- job.println(streamLength + "\t"
- + String.format("%.2f", rankErrorSketch.getQuantile((double)
errorPct / 100) * 100));
- streamLength = (int)pwr2SeriesNext(ppo, streamLength);
- }
- }
-
- abstract void configure(Properties props);
-
- abstract void prepareTrial(int streamLength);
-
- abstract double doTrial();
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
deleted file mode 100644
index e410ea8..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-import static org.apache.datasketches.common.Util.pwr2SeriesNext;
-
-import org.apache.datasketches.Job;
-import org.apache.datasketches.JobProfile;
-import org.apache.datasketches.Properties;
-
-public abstract class QuantilesSpeedProfile implements JobProfile {
-
- private Job job;
-
- @Override
- public void start(final Job job) {
- this.job = job;
- doTrials();
- }
-
- private void doTrials() {
- final int lgMinStreamLen =
Integer.parseInt(job.getProperties().mustGet("lgMin"));
- final int lgMaxStreamLen =
Integer.parseInt(job.getProperties().mustGet("lgMax"));
- final int minStreamLen = 1 << lgMinStreamLen;
- final int maxStreamLen = 1 << lgMaxStreamLen;
- final int pointsPerOctave =
Integer.parseInt(job.getProperties().mustGet("PPO"));
-
- final int lgMaxTrials =
Integer.parseInt(job.getProperties().mustGet("lgMaxTrials"));
- final int lgMinTrials =
Integer.parseInt(job.getProperties().mustGet("lgMinTrials"));
-
- final int k = Integer.parseInt(job.getProperties().mustGet("K"));
- final int numQueryValues =
Integer.parseInt(job.getProperties().mustGet("numQueryValues"));
-
- configure(k, numQueryValues, job.getProperties());
-
- job.println(getHeader());
-
- int streamLength = minStreamLen;
- while (streamLength <= maxStreamLen) {
- prepareTrial(streamLength);
- final int numTrials = getNumTrials(streamLength, lgMinStreamLen,
lgMaxStreamLen,
- lgMinTrials, lgMaxTrials);
- for (int i = 0; i < numTrials; i++) {
- doTrial();
- }
- job.println(getStats(streamLength, numTrials, numQueryValues));
- streamLength = (int)pwr2SeriesNext(pointsPerOctave, streamLength);
- }
- }
-
- abstract void configure(int k, int numQueryValues, Properties properties);
-
- abstract void prepareTrial(int streamLength);
-
- abstract void doTrial();
-
- abstract String getHeader();
-
- abstract String getStats(int streamLength, int numTrials, int
numQueryValues);
-
- private static int getNumTrials(final int x, final int lgMinX, final int
lgMaxX,
- final int lgMinTrials, final int lgMaxTrials) {
- final double slope = (double) (lgMaxTrials - lgMinTrials) / (lgMinX -
lgMaxX);
- final double lgX = Math.log(x) / JobProfile.LN2;
- final double lgTrials = slope * lgX + lgMaxTrials;
- return (int) Math.pow(2, lgTrials);
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestAccuracyProfile.java
deleted file mode 100644
index 181d2c9..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestAccuracyProfile.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-import java.util.Arrays;
-
-import org.apache.datasketches.Properties;
-import
org.apache.datasketches.characterization.quantiles.tdigest.DataGenerator.Mode;
-
-import com.tdunning.math.stats.TDigest;
-
-public class TDigestAccuracyProfile extends QuantilesAccuracyProfile {
-
- private int compression;
- private double[] inputValues;
- private DataGenerator gen;
- private DoubleRankCalculator.Mode rankMode;
-
- @Override
- void configure(final Properties props) {
- compression = Integer.parseInt(props.mustGet("compression"));
- gen = new DataGenerator(Mode.valueOf(props.mustGet("data")));
- rankMode = DoubleRankCalculator.Mode.valueOf(props.mustGet("rank"));
- }
-
- @Override
- void prepareTrial(final int streamLength) {
- inputValues = new double[streamLength];
- }
-
- @Override
- double doTrial() {
- gen.fillArray(inputValues);
-
- // build sketch
- final TDigest sketch = TDigest.createDigest(compression);
- for (int i = 0; i < inputValues.length; i++) {
- sketch.add(inputValues[i]);
- }
-
- Arrays.sort(inputValues);
-
- // query sketch and gather results
- double maxRankError = 0;
- final DoubleRankCalculator rank = new DoubleRankCalculator(inputValues,
rankMode);
- for (int i = 0; i < inputValues.length; i++) {
- final double trueRank = rank.getRank(inputValues[i]);
- final double estRank = sketch.cdf(inputValues[i]);
- maxRankError = Math.max(maxRankError, Math.abs(trueRank - estRank));
- }
- return maxRankError;
- }
-
- @Override
- public void shutdown() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void cleanup() {
- // TODO Auto-generated method stub
-
- }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestSpeedProfile.java
deleted file mode 100644
index 821fcd8..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/TDigestSpeedProfile.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
-
-import java.nio.ByteBuffer;
-
-import org.apache.datasketches.Properties;
-
-import com.tdunning.math.stats.MergingDigest;
-import com.tdunning.math.stats.TDigest;
-
-public class TDigestSpeedProfile extends QuantilesSpeedProfile {
- //for serialization, more capacity than necessary
- private static final ByteBuffer buf = ByteBuffer.allocate(10000);
- private int k;
- private double[] inputValues;
- private int numQueryValues;
- private double[] queryValues;
- private DataGenerator gen;
-
- long buildTimeNs;
- long updateTimeNs;
- long getQuantileTimeNs;
- long getRankTimeNs;
- long serializeTimeNs;
- long deserializeTimeNs;
- long smallSerializeTimeNs;
- long smallDeserializeTimeNs;
- long numRetainedItems;
- long serializedSizeBytes;
- long smallSerializedSizeBytes;
-
- @Override
- void configure(final int k, final int numQueryValues, final Properties
properties) {
- this.k = k;
- this.numQueryValues = numQueryValues;
- gen = new DataGenerator(DataGenerator.Mode.Uniform);
- }
-
- @Override
- void prepareTrial(final int streamLength) {
- inputValues = new double[streamLength];
- queryValues = new double[numQueryValues];
- resetStats();
- }
-
- @Override
- void doTrial() {
- gen.fillArray(inputValues);
- gen.fillArray(queryValues);
-
- final long startBuild = System.nanoTime();
- final TDigest sketch = TDigest.createDigest(k);
- final long stopBuild = System.nanoTime();
- buildTimeNs += stopBuild - startBuild;
-
- final long startUpdate = System.nanoTime();
- for (int i = 0; i < inputValues.length; i++) {
- sketch.add(inputValues[i]);
- }
- final long stopUpdate = System.nanoTime();
- updateTimeNs += stopUpdate - startUpdate;
-
- final long startGetQuantile = System.nanoTime();
- for (final double value: queryValues) {
- sketch.quantile(value);
- }
- final long stopGetQuantile = System.nanoTime();
- getQuantileTimeNs += stopGetQuantile - startGetQuantile;
-
- final long startGetRank = System.nanoTime();
- for (final double value: queryValues) {
- sketch.cdf(value);
- }
- final long stopGetRank = System.nanoTime();
- getRankTimeNs += stopGetRank - startGetRank;
-
- buf.rewind();
- final long startSerialize = System.nanoTime();
- sketch.asBytes(buf);
- final long stopSerialize = System.nanoTime();
- serializeTimeNs += stopSerialize - startSerialize;
- buf.rewind();
- final long startDeserialize = System.nanoTime();
- MergingDigest.fromBytes(buf);
- final long stopDeserialize = System.nanoTime();
- deserializeTimeNs += stopDeserialize - startDeserialize;
- buf.rewind();
- final long startSmallSerialize = System.nanoTime();
- sketch.asSmallBytes(buf);
- final long stopSmallSerialize = System.nanoTime();
- smallSerializeTimeNs += stopSmallSerialize - startSmallSerialize;
- buf.rewind();
- final long startSmallDeserialize = System.nanoTime();
- MergingDigest.fromBytes(buf);
- final long stopSmallDeserialize = System.nanoTime();
- smallDeserializeTimeNs += stopSmallDeserialize - startSmallDeserialize;
-
- // could record the last one since they must be the same
- // but let's average across all trials to see if there is an anomaly
- numRetainedItems += sketch.centroidCount();
- serializedSizeBytes += sketch.byteSize();
- smallSerializedSizeBytes += sketch.smallByteSize();
- }
-
- @Override
- String getHeader() {
- return
"Stream\tTrials\tBuild\tUpdate\tQuant\tCDF\tSer\tDe\tSer\tDe\tItems\tstatsSize\tSmall";
- }
-
- @Override
- String getStats(final int streamLength, final int numTrials, final int
numQueryValues) {
- return
(String.format("%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%d\t%d\t%d",
- streamLength,
- numTrials,
- (double) buildTimeNs / numTrials,
- (double) updateTimeNs / numTrials / streamLength,
- (double) getQuantileTimeNs / numTrials / numQueryValues,
- (double) getRankTimeNs / numTrials / numQueryValues,
- (double) serializeTimeNs / numTrials,
- (double) deserializeTimeNs / numTrials,
- (double) smallSerializeTimeNs / numTrials,
- (double) smallDeserializeTimeNs / numTrials,
- numRetainedItems / numTrials,
- serializedSizeBytes / numTrials,
- smallSerializedSizeBytes / numTrials
- ));
- }
-
- private void resetStats() {
- buildTimeNs = 0;
- updateTimeNs = 0;
- getQuantileTimeNs = 0;
- getRankTimeNs = 0;
- numRetainedItems = 0;
- serializeTimeNs = 0;
- deserializeTimeNs = 0;
- smallSerializeTimeNs = 0;
- smallDeserializeTimeNs = 0;
- serializedSizeBytes = 0;
- smallSerializedSizeBytes = 0;
- }
-
- @Override
- public void shutdown() { }
-
- @Override
- public void cleanup() { }
-
-}
diff --git
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/package-info.java
b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/package-info.java
deleted file mode 100644
index 4b85ac5..0000000
---
a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/package-info.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * @author Lee Rhodes
- */
-
-package org.apache.datasketches.characterization.quantiles.tdigest;
diff --git
a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
index fc13064..3cb7daf 100644
---
a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
@@ -22,8 +22,8 @@ package org.apache.datasketches.characterization.req;
import static java.lang.Math.round;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
import static org.apache.datasketches.SpacedPoints.expSpaced;
-import static org.apache.datasketches.Util.evenlySpaced;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+import static
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
@@ -256,7 +256,7 @@ public class ReqSketchAccuracyProfile implements JobProfile
{
//generates PP indices in [startIdx, endIdx] inclusive, inclusive
// PV 2020-01-07: using double so that there's enough precision even for
large stream lengths
final double[] temp = evenlySpaced
- ? evenlySpaced(startIdx, endIdx, numPlotPoints)
+ ? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints)
: expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra);
sortedPPIndices = new int[numPlotPoints];
diff --git
a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java
b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java
index 16ed7c9..dd36711 100644
---
a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java
+++
b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java
@@ -20,7 +20,7 @@
package org.apache.datasketches.characterization.req;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
-import static org.apache.datasketches.Util.evenlySpacedFloats;
+import static
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedFloats;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
diff --git
a/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java
b/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java
index 7c862f3..c45feb8 100644
---
a/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java
+++
b/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java
@@ -21,8 +21,8 @@ package org.apache.datasketches.characterization.req;
import java.util.Arrays;
-import org.apache.datasketches.BinarySearch;
import org.apache.datasketches.characterization.Shuffle;
+import org.apache.datasketches.quantilescommon.BinarySearch;
import org.testng.annotations.Test;
/**
diff --git
a/src/main/java/org/apache/datasketches/characterization/theta/ThetaSerDeProfile.java
b/src/main/java/org/apache/datasketches/characterization/theta/ThetaSerDeProfile.java
index 0d9e84d..5286744 100644
---
a/src/main/java/org/apache/datasketches/characterization/theta/ThetaSerDeProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/theta/ThetaSerDeProfile.java
@@ -20,9 +20,6 @@
package org.apache.datasketches.characterization.theta;
import org.apache.datasketches.characterization.uniquecount.BaseSerDeProfile;
-
-//import static org.testng.Assert.assertEquals;
-
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.memory.Memory;
diff --git
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaAccuracyProfile.java
index 9c1bd1c..c1f8e77 100644
---
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaAccuracyProfile.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.characterization.theta.concurrent;
-import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static
org.apache.datasketches.thetacommon.ThetaUtil.DEFAULT_UPDATE_SEED;
import org.apache.datasketches.characterization.AccuracyStats;
import
org.apache.datasketches.characterization.uniquecount.BaseAccuracyProfile;
diff --git
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaMultithreadedSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaMultithreadedSpeedProfile.java
index 29c1762..4218edc 100644
---
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaMultithreadedSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaMultithreadedSpeedProfile.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.characterization.theta.concurrent;
-import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static
org.apache.datasketches.thetacommon.ThetaUtil.DEFAULT_UPDATE_SEED;
import java.util.concurrent.locks.ReentrantReadWriteLock;
diff --git
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaUpdateSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaUpdateSpeedProfile.java
index 16ee1fa..b341f5f 100644
---
a/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaUpdateSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/theta/concurrent/ConcurrentThetaUpdateSpeedProfile.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.characterization.theta.concurrent;
-import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static
org.apache.datasketches.thetacommon.ThetaUtil.DEFAULT_UPDATE_SEED;
import
org.apache.datasketches.characterization.uniquecount.BaseUpdateSpeedProfile;
import org.apache.datasketches.memory.WritableHandle;
diff --git
a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
index 38a5922..24db995 100644
---
a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
@@ -20,7 +20,7 @@
package org.apache.datasketches.characterization.uniquecount;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_4SD;
-import static org.apache.datasketches.Util.milliSecToString;
+import static org.apache.datasketches.common.Util.milliSecToString;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
import org.apache.datasketches.Job;
diff --git
a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
index 31278f9..d975728 100644
---
a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
@@ -20,7 +20,7 @@
package org.apache.datasketches.characterization.uniquecount;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
-import static org.apache.datasketches.Util.milliSecToString;
+import static org.apache.datasketches.common.Util.milliSecToString;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
import java.io.PrintWriter;
diff --git a/src/main/resources/hll/druid/DruidHllAccuracyJob.conf
b/src/main/resources/hll/druid/DruidHllAccuracyJob.conf
deleted file mode 100644
index 830c276..0000000
--- a/src/main/resources/hll/druid/DruidHllAccuracyJob.conf
+++ /dev/null
@@ -1,44 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-#Uniques Profile
-Trials_lgMinU=0 #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=25 #How high the # uniques go
-Trials_UPPO=32 #The horizontal x-resolution of trials points
-Trials_string=false
-
-# Trials Profile
-Trials_lgMinT=8 #prints intermediate results starting w/ this lgMinT
-Trials_lgMaxT=12 #The max trials
-Trials_TPPO=1 #how often intermediate results are printed
-
-Trials_lgQK=13 #size of quantiles sketch
-Trials_interData=true
-Trials_postPMFs=false
-Trials_bytes=false
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.druidhll.DruidHllAccuracyProfile
-LgK=0 # not used by Druid HLL
diff --git a/src/main/resources/hll/druid/DruidHllMergeAccuracyJob.conf
b/src/main/resources/hll/druid/DruidHllMergeAccuracyJob.conf
deleted file mode 100644
index f005188..0000000
--- a/src/main/resources/hll/druid/DruidHllMergeAccuracyJob.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-JobProfile=org.apache.datasketches.characterization.hll.DruidHll.MergeAccuracyProfile
-
-numTrials=100
-numSketches=8192
-distinctKeysPerSketch=32768
diff --git a/src/main/resources/hll/zeta/ZetaHllAccuracyJob.conf
b/src/main/resources/hll/zeta/ZetaHllAccuracyJob.conf
deleted file mode 100644
index b7b2089..0000000
--- a/src/main/resources/hll/zeta/ZetaHllAccuracyJob.conf
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-#Uniques Profile
-Trials_lgMinU=0 #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=13 #How high the # uniques go
-Trials_UPPO=16 #The horizontal x-resolution of trials points
-Trials_string=false #Used in Druid HLL profile
-
-# Trials Profile
-Trials_lgMinT=8 #prints intermediate results starting w/ this lgMinT
-Trials_lgMaxT=20 #The max trials
-Trials_TPPO=1 #how often intermediate results are printed
-
-Trials_lgQK=13 #size of quantiles sketch for analysis
-Trials_interData=true
-Trials_postPMFs=false
-Trials_bytes=false
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllAccuracyProfile
-LgK=12 #Normal precision, Min 10, Max 24
-LgSP=12 #Sparse precision= Normal Prcision + 0 to + 5, Max 25
-ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
diff --git a/src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
b/src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
deleted file mode 100644
index 30f1ff8..0000000
--- a/src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
+++ /dev/null
@@ -1,37 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#Job
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-# Trials Profile
-numTrials=100
-numSketches=8192
-distinctKeysPerSketch=32768
-
-# Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeAccuracyProfile
-ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
-lgK=11
-
-
-
diff --git a/src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
b/src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
deleted file mode 100644
index fc76c54..0000000
--- a/src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#X-axis LgK Profile
-MinLgK=10
-MaxLgK=21
-
-# Trials Profile
-MinLgT=6 #Min Log Trails
-MaxLgT=6 #Max Log Trials
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeSpeedProfile
-ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
-SerDe=false
-LgDeltaU=2
-
diff --git a/src/main/resources/hll/zeta/ZetaHllSerDeJob.conf
b/src/main/resources/hll/zeta/ZetaHllSerDeJob.conf
deleted file mode 100644
index 1d36642..0000000
--- a/src/main/resources/hll/zeta/ZetaHllSerDeJob.conf
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-#Uniques Profile
-Trials_lgMinU=0 #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=24 #How high the # uniques go
-Trials_UPPO=2 #The horizontal x-resolution of trial points, Points Per
Octave
-
-# Trials Profile
-Trials_lgMaxT=20 #Max trials at start (low counts)
-Trials_lgMinT=7 #Min trials at tail (high counts)
-
-#Trails Speed related
-Trials_lgMinBpU=4 #start the downward slope of trials at this LgU
-Trials_lgMaxBpU=20 #stop the downward slope of trials at this LgU
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllSerDeProfile
-LgK=20 #Normal precision, Min 10, Max 24
-LgSP=25 #Sparse precision= Normal Prcision + 0 to 5, Max 25
-ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
\ No newline at end of file
diff --git a/src/main/resources/hll/zeta/ZetaHllSpeedJob.conf
b/src/main/resources/hll/zeta/ZetaHllSpeedJob.conf
deleted file mode 100644
index 9b32341..0000000
--- a/src/main/resources/hll/zeta/ZetaHllSpeedJob.conf
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-#Uniques Profile
-Trials_lgMinU=0 #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=23 #How high the # uniques go
-Trials_UPPO=16 #The horizontal x-resolution of trials points
-
-# Trials Profile
-Trials_lgMaxT=24 #Max trials at start (low counts)
-Trials_lgMinT=4 #Min trials at tail (high counts)
-
-#Trails Speed related
-Trials_lgMinBpU=4 #start the downward slope of trials at this LgU
-Trials_lgMaxBpU=20 #stop the downward slope of trials at this LgU
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllUpdateSpeedProfile
-LgK=12 #Normal precision, Min 10, Max 24
-LgSP=17 #Sparse precision= Normal Prcision + 0 to 5, Max 25
-ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
diff --git a/src/main/resources/hll/zeta/ZetaHllUnionUpdateSpeedJob.conf
b/src/main/resources/hll/zeta/ZetaHllUnionUpdateSpeedJob.conf
deleted file mode 100644
index 4687669..0000000
--- a/src/main/resources/hll/zeta/ZetaHllUnionUpdateSpeedJob.conf
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Job
-
-#Uniques Profile
-Trials_lgMinU=0 #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=26 #How high the # uniques go
-Trials_UPPO=16 #The horizontal x-resolution of trials points
-
-# Trials Profile
-Trials_lgMaxT=14 #Max trials at start (low counts)
-Trials_lgMinT=4 #Min trials at tail (high counts)
-
-#Trails Speed related
-Trials_lgMinBpU=0 #start the downward slope of trials at this LgU
-Trials_lgMaxBpU=26 #stop the downward slope of trials at this LgU
-
-# Date-Time Profile
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllUnionUpdateSpeedProfile
-LgK=12
-NumSketches=32
\ No newline at end of file
diff --git a/src/main/resources/quantiles/DruidAHStreamAJob.conf
b/src/main/resources/quantiles/DruidAHStreamAJob.conf
deleted file mode 100644
index ad6e428..0000000
--- a/src/main/resources/quantiles/DruidAHStreamAJob.conf
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.quantiles.DruidAppHistStreamAProfile
-
-FileName=/Users/lrhodes/dev/git/characterization/streamA.txt
-ReportInterval=10000000
-NumRanks=99
-LogBase=10.0
-PPLB=5
-
-HistSize=105
-
-CdfHdr=%6s\t%16s\t%16s
-CdfFmt=%6d\t%16.2f\t%16d
-PdfHdr=%6s\t%16s\t%16s
-PdfFmt=%6d\t%16.6f\t%16.0f
-
-# Date-Time Profile: Read by Job.java
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
diff --git a/src/main/resources/quantiles/MSketchStreamAJob.conf
b/src/main/resources/quantiles/MSketchStreamAJob.conf
deleted file mode 100644
index 5c18570..0000000
--- a/src/main/resources/quantiles/MSketchStreamAJob.conf
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.quantiles.MSketchStreamAProfile
-
-FileName=/Users/lrhodes/dev/git/characterization/streamA.txt
-ReportInterval=10000000
-NumRanks=101
-LogBase=10
-PPLB=5
-
-Moments=15
-
-CdfHdr=%6s\t%16s\t%16s
-CdfFmt=%6d\t%16.2f\t%16.0f
-PdfHdr=%6s\t%16s\t%16s
-PdfFmt=%6d\t%16.6f\t%16.0f
-
-
-# Date-Time Profile: Read by Job.java
-TimeZone=PST
-TimeZoneOffset=-28800000 # offset in millisec
-FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
diff --git a/src/main/resources/quantiles/TDigestAccuracyJob.conf
b/src/main/resources/quantiles/TDigestAccuracyJob.conf
deleted file mode 100644
index c7e8f6c..0000000
--- a/src/main/resources/quantiles/TDigestAccuracyJob.conf
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-JobProfile=org.apache.datasketches.characterization.quantiles.tdigest.TDigestAccuracyProfile
-compression=200 # sketch size and accuracy parameter
-errLgK=10 # parameter for the rank error distribution sketch
-errPct=99 # percentile of the rank error distribution to report
-trials=20 # Trials at every steam length
-lgMin=0 # The starting stream length
-lgMax=23 # How high the stream length goes
-PPO=16 # The horizontal x-resolution of trials points
-data=Gaussian # distribution of input data
-rank=Max # rank rule
diff --git a/src/main/resources/quantiles/TDigestSpeedJob.conf
b/src/main/resources/quantiles/TDigestSpeedJob.conf
deleted file mode 100644
index 74389c1..0000000
--- a/src/main/resources/quantiles/TDigestSpeedJob.conf
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-JobProfile=org.apache.datasketches.characterization.quantiles.tdigest.TDigestSpeedProfile
-K=200 # sketch size and accuracy parameter
-numQueryValues=20 # number of values for getQuantiles(), getCDF() and getRank()
-
-lgMin=0 # The starting stream length
-lgMax=23 # How high the stream length goes
-PPO=16 # The horizontal x-resolution of trials points
-
-lgMaxTrials=16 # Max trials at start (low counts)
-lgMinTrials=9 # Min trials at tail (high counts)
diff --git a/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
b/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
index b0aac80..255b0ec 100644
--- a/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
+++ b/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
@@ -23,6 +23,7 @@ import static
org.apache.datasketches.MonotonicPoints.countLog10Points;
import static org.apache.datasketches.MonotonicPoints.countPoints;
import static org.apache.datasketches.common.Util.powerSeriesNextDouble;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+import static
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles;
import static org.testng.Assert.assertEquals;
import org.testng.annotations.Test;
@@ -53,7 +54,7 @@ public class MonotonicPointsTest {
@Test
public void checkEvenlySpacedPoints() {
- double[] arr = Util.evenlySpaced(0.0, 100.0, 21);
+ double[] arr = evenlySpacedDoubles(0.0, 100.0, 21);
for (int i = 0; i < arr.length; i++) { println(arr[i] + ""); }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]