AlexanderSaydakov commented on code in PR #475:
URL: https://github.com/apache/datasketches-java/pull/475#discussion_r1408279367


##########
pom.xml:
##########
@@ -150,6 +150,13 @@ under the License.
       <version>${testng.version}</version>
       <scope>test</scope>
     </dependency>
+    <!--
+    <dependency>
+      <groupId>org.apache.datasketches</groupId>
+      <artifactId>datasketches-java-common</artifactId>
+      <version>1.0.0</version>
+    </dependency>
+    -->

Review Comment:
   is this commented section needed?



##########
src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java:
##########
@@ -39,56 +41,91 @@ public final class KllDoublesSketchSortedView implements 
DoublesSortedView {
   private final double[] quantiles;
   private final long[] cumWeights; //comes in as individual weights, converted 
to cumulative natural weights
   private final long totalN;
+  private final double[] normRanks;
+  private final double maxItem;
+  private final double minItem;
 
   /**
    * Construct from elements for testing.
    * @param quantiles sorted array of quantiles
    * @param cumWeights sorted, monotonically increasing cumulative weights.
    * @param totalN the total number of items presented to the sketch.
    */
-  KllDoublesSketchSortedView(final double[] quantiles, final long[] 
cumWeights, final long totalN) {
+  KllDoublesSketchSortedView(final double[] quantiles, final long[] 
cumWeights, final long totalN,
+      final double maxItem, final double minItem) {
     this.quantiles = quantiles;
     this.cumWeights  = cumWeights;
     this.totalN = totalN;
+    this.maxItem = maxItem;
+    this.minItem = minItem;
+    final int len = cumWeights.length;
+    final double[] normRanks = new double[len];
+    for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / 
totalN; }
+    this.normRanks = normRanks;
   }
 
   /**
    * Constructs this Sorted View given the sketch
-   * @param sk the given KllDoublesSketch.
+   * @param sketch the given KllDoublesSketch.
    */
-  public KllDoublesSketchSortedView(final KllDoublesSketch sk) {
-    this.totalN = sk.getN();
-    final double[] srcQuantiles = sk.getDoubleItemsArray();
-    final int[] srcLevels = sk.levelsArr;
-    final int srcNumLevels = sk.getNumLevels();
+  public KllDoublesSketchSortedView(final KllDoublesSketch sketch) {
+    if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
+    this.totalN = sketch.getN();
+    this.maxItem = sketch.getMaxItem();
+    this.minItem = sketch.getMinItem();
+    final double[] srcQuantiles = sketch.getDoubleItemsArray();
+    final int[] srcLevels = sketch.levelsArr;
+    final int srcNumLevels = sketch.getNumLevels();
 
-    if (!sk.isLevelZeroSorted()) {
+    if (!sketch.isLevelZeroSorted()) {

Review Comment:
   Seems too invasive. I would suggest asking the sketch to sort its own level 
zero



##########
src/main/java/org/apache/datasketches/partitions/Partitioner.java:
##########
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.partitions;
+
+import static java.lang.Math.ceil;
+import static java.lang.Math.log;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.lang.Math.pow;
+import static java.lang.Math.round;
+import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries;
+import org.apache.datasketches.quantilescommon.PartitioningFeature;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
+import org.apache.datasketches.quantilescommon.QuantilesGenericAPI;
+import org.apache.datasketches.quantilescommon.Stack;
+
+/**
+ * A partitioning process that can partition very large data sets into 
thousands
+ * of partitions of approximately the same size.
+ * @param <T> the data type
+ * @param <S> the quantiles sketch that implements both QuantilesGenericAPI 
and PartitioningFeature.
+ */
+//@SuppressWarnings("unused")

Review Comment:
   do we need to keep this?



##########
src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java:
##########
@@ -132,18 +204,36 @@ public double[] getPMF(final T[] splitPoints, final 
QuantileSearchCriteria searc
   public T getQuantile(final double rank, final QuantileSearchCriteria 
searchCrit) {
     if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
     QuantilesUtil.checkNormalizedRankBounds(rank);
+    final int index = getQuantileIndex(rank, searchCrit);
+    return getQuantileFromIndex(index);
+  }
+
+  private T getQuantileFromIndex(final int index) { return quantiles[index]; }

Review Comment:
   having a method just to get an item from an internal array by index for 
internal use seems like unnecessary complexity



##########
src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java:
##########
@@ -116,10 +132,66 @@ public long[] getCumulativeWeights() {
     return cumWeights.clone();
   }
 
-  @Override //implemented here because it needs the comparator
+  @Override
+  public T getMaxItem() {
+    return maxItem;
+  }
+
+  @Override
+  public T getMinItem() {
+    return minItem;
+  }
+
+  @Override
+  public long getN() {
+    return totalN;
+  }
+
+  @Override
+  public double[] getNormalizedRanks() {
+    return normRanks.clone();
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public GenericPartitionBoundaries<T> getPartitionBoundaries(final int 
numEquallySized,
+      final QuantileSearchCriteria searchCrit) {
+    if (isEmpty()) { throw new 
IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
+    final long totalN = this.totalN;
+    final int svLen = cumWeights.length;
+    //adjust ends of sortedView arrays
+    cumWeights[0] = 1L;
+    cumWeights[svLen - 1] = totalN;
+    normRanks[0] = 1.0 / totalN;
+    normRanks[svLen - 1] = 1.0;
+    quantiles[0] = this.getMinItem();
+    quantiles[svLen - 1] = this.getMaxItem();
+
+    final double[] evSpNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized 
+ 1);
+    final int len = evSpNormRanks.length;
+    final T[] evSpQuantiles = (T[]) Array.newInstance(clazz, len);
+
+    final long[] evSpNatRanks = new long[len];
+    for (int i = 0; i < len; i++) {
+      final int index = getQuantileIndex(evSpNormRanks[i], searchCrit);
+      evSpQuantiles[i] = getQuantileFromIndex(index);
+      evSpNatRanks[i] = getCumWeightFromIndex(index);
+    }
+    final GenericPartitionBoundaries<T> gpb = new GenericPartitionBoundaries<>(
+        this.totalN,
+        evSpQuantiles.clone(),

Review Comment:
   does the partitioner modify these arrays? do we really have to clone?



##########
src/main/java/org/apache/datasketches/quantilescommon/Stack.java:
##########
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.quantilescommon;
+
+import java.util.ArrayList;
+
+import org.apache.datasketches.common.SketchesStateException;
+
+/**
+ * A classic LIFO stack based on ArrayList (as opposed to Vector).
+ * All of the methods of ArrayList are available.
+ */
+public class Stack<E> extends ArrayList<E> {

Review Comment:
   Why is this needed? Would java.util.Deque do the business?



##########
src/main/java/org/apache/datasketches/common/Util.java:
##########
@@ -550,56 +531,60 @@ public static double powerSeriesNextDouble(final int ppb, 
final double curPoint,
   }
 
   /**
-   * Computes the ceiling power of given <i>base</i> and <i>n</i> as doubles.
-   * This is the smallest positive power
-   * of <i>base</i> that equal to or greater than the given <i>n</i> and equal 
to a mathematical integer.
+   * Returns the ceiling of a given <i>n</i> given a <i>radix</i>, where the 
ceiling is an integral power of the radix.
+   * This is the smallest positive power of <i>radix</i> that is equal to or 
greater than the given <i>n</i>
+   * and equal to a mathematical integer.
    * The result of this function is consistent with {@link 
#ceilingIntPowerOf2(int)} for values
    * less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
    *
-   * @param base The base in the expression &#8968;base<sup>n</sup>&#8969;.
+   * <p>The formula is: 
<i>radix<sup>ceiling(log<sub>radix</sub>(x))</sup></i></p>
+   *
+   * @param radix The base of the number system.
    * @param n The input argument.
-   * @return the ceiling power of <i>base</i> as a double and equal to a 
mathematical integer.
+   * @return the ceiling power of <i>radix</i> as a double and equal to a 
mathematical integer.
    */
-  public static double ceilingPowerBaseOfDouble(final double base, final 
double n) {
+  public static double ceilingPowerBaseOfDouble(final double radix, final 
double n) {
     final double x = n < 1.0 ? 1.0 : n;
-    return pow(base, ceil(logBaseOfX(base, x)));
+    return Math.round(pow(radix, ceil(logBaseOfX(radix, x))));
   }
 
   /**
-   * Computes the floor power of given <i>base</i> and <i>n</i> as doubles.
-   * This is the largest positive power
-   * of <i>base</i> that equal to or less than the given n and equal to a 
mathematical integer.
+   * Computes the floor of a given <i>n</i> given <i>radix</i>, where the 
floor is an integral power of the radix.
+   * This is the largest positive power of <i>radix</i> that is equal to or 
less than the given <i>n</i>
+   * and equal to a mathematical integer.
    * The result of this function is consistent with {@link 
#floorPowerOf2(int)} for values
    * less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
    *
-   * @param base The base in the expression &#8970;base<sup>n</sup>&#8971;.
+   * <p>The formula is: 
<i>radix<sup>floor(log<sub>radix</sub>(x))</sup></i></p>
+   *
+   * @param radix The base of the number system.
    * @param n The input argument.
    * @return the floor power of 2 and equal to a mathematical integer.
    */
-  public static double floorPowerBaseOfDouble(final double base, final double 
n) {
+  public static double floorPowerBaseOfDouble(final double radix, final double 
n) {
     final double x = n < 1.0 ? 1.0 : n;
-    return pow(base, floor(logBaseOfX(base, x)));
+    return Math.round(pow(radix, floor(logBaseOfX(radix, x))));
   }
 
   // Logarithm related
 
   /**
-   * The log base 2 of the value
+   * The log<sub>2</sub>(value)
    * @param value the given value
-   * @return The log base 2 of the value
+   * @return log<sub>2</sub>(value)
    */
   public static double log2(final double value) {
     return log(value) / LOG2;
   }
 
   /**
-   * Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / 
log(2.0).
-   * @param logBase the base of the logarithm used
+   * Returns the log<sub>radix</sub>(x). Example: logB(2.0, x) = log(x) / 
log(2.0).

Review Comment:
   "logarithm of X to the base B" is a standard terminology. why radix?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to