github-advanced-security[bot] commented on code in PR #475: URL: https://github.com/apache/datasketches-java/pull/475#discussion_r1396423372
########## src/main/java/org/apache/datasketches/partitions/Partitioner.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.lang.Math.round; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; +import org.apache.datasketches.quantilescommon.Stack; + +/** + * A partitioning process that can partition very large data sets into thousands to millions + * of partitions of approximately the same size. + * @param T the data type + * @param S the quantiles sketch that implements both QuantilesGenericAPI and PartitioningFeature. + */ +//@SuppressWarnings("unused") +public class Partitioner<T, S extends QuantilesGenericAPI<T> & PartitioningFeature<T>> { + private static final QuantileSearchCriteria defaultCriteria = INCLUSIVE; + private final long tgtPartitionSize; + private final int maxPartsPerSk; + private final SketchFillRequest<T, S> fillReq; + private final QuantileSearchCriteria criteria; + private final Stack<StackElement<T>> stack = new Stack<>(); + + //computed once at the beginning + private int numLevels; + private int partitionsPerSk; + //output + private final List<PartitionBoundsRow<T>> finalPartitionList = new ArrayList<>(); + + /** + * This constructor assumes a QuantileSearchCriteria of INCLUSIVE. + * @param tgtPartitionSize the target size of the resulting partitions in number of items. + * @param maxPartsPerPass The maximum number of partitions to request from the sketch. The smaller this number is + * the smaller the variance will be of the resulting partitions, but this will increase the number of passes of the + * source data set. + * @param fillReq The is an implementation of the SketchFillRequest call-back supplied by the user and implements + * the SketchFillRequest interface. + */ + public Partitioner( + final long tgtPartitionSize, + final int maxPartsPerPass, + final SketchFillRequest<T,S> fillReq) { + this(tgtPartitionSize, maxPartsPerPass, fillReq, defaultCriteria); + } + + /** + * This constructor includes the QuantileSearchCriteria criteria as a parameter. + * @param tgtPartitionSize the target size of the resulting partitions in number of items. + * @param maxPartsPerSk The maximum number of partitions to request from the sketch. The smaller this number is + * the smaller the variance will be of the resulting partitions, but this will increase the number of passes of the + * source data set. + * @param fillReq The is an implementation of the SketchFillRequest call-back supplied by the user. + * @param criteria This is the desired QuantileSearchCriteria to be used. + */ + public Partitioner( + final long tgtPartitionSize, + final int maxPartsPerSk, + final SketchFillRequest<T,S> fillReq, + final QuantileSearchCriteria criteria) { + this.tgtPartitionSize = tgtPartitionSize; + this.maxPartsPerSk = maxPartsPerSk; + this.fillReq = fillReq; + this.criteria = criteria; + } + + /** + * This initiates the partitioning process + * @param sk A sketch of the entire data set. + * @return the final partitioning list + */ + public List<PartitionBoundsRow<T>> partition(final S sk) { + if (sk.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final long inputN = sk.getN(); + final double guessNumParts = max(1.0, ceil((double)inputN / tgtPartitionSize)); + this.numLevels = (int)max(1, ceil(log(guessNumParts) / log(maxPartsPerSk))); + final int partsPerSk = (int)round(pow(guessNumParts, 1.0 / numLevels)); + this.partitionsPerSk = min(partsPerSk, maxPartsPerSk); + final GenericPartitionBoundaries<T> gpb = sk.getPartitionBoundaries(partitionsPerSk, criteria); + final StackElement<T> se = new StackElement<>(gpb, stack.size() + 1, 0, "1"); + stack.push(se); + partitionSearch(stack); + return finalPartitionList; + } + + private void partitionSearch(final Stack<StackElement<T>> stack) { + if (stack.isEmpty()) { + return; + } + final StackElement<T> se = stack.peek(); + final GenericPartitionBoundaries<T> gpb = se.gpb; + final int numParts = gpb.getNumPartitions(); + + if (stack.size() == numLevels) { //at max level + while (++se.part <= numParts) { //add rows to final partition list + final PartitionBoundsRow<T> row = new PartitionBoundsRow<>(se); + finalPartitionList.add(row); + } + stack.pop(); + partitionSearch(stack); + } + else { //not at max level + if (++se.part <= numParts) { + final PartitionBoundsRow<T> row = new PartitionBoundsRow<>(se); + final S sk = fillReq.getRange(row.lowerBound, row.upperBound, row.rule); + final GenericPartitionBoundaries<T> gpb2 = sk.getPartitionBoundaries(this.partitionsPerSk, criteria); + final int level = stack.size() + 1; + final String partId = se.partId + "." + se.part + "," + level; + final StackElement<T> se2 = new StackElement<>(gpb2, level, 0, partId); + stack.push(se2); + partitionSearch(stack); + } + //done with all parts at this level + if (stack.isEmpty()) { + return; + } + stack.pop(); + partitionSearch(stack); + } + } + + /** + * Holds data for a Stack element + */ + public static class StackElement<T> { + public final GenericPartitionBoundaries<T> gpb; + public int part; + public String partId; + + public StackElement(final GenericPartitionBoundaries<T> gpb, final int level, final int part, final String partId) { Review Comment: ## Useless parameter The parameter 'level' is never used. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/653) ########## src/test/java/org/apache/datasketches/quantilescommon/LongsAsOrderableStrings.java: ########## @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static org.apache.datasketches.common.Util.characterPad; + +/** + * Creates a string from a positive long value that is orderable in the + * same order as its long value. + */ +public final class LongsAsOrderableStrings { + + /** + * Converts the given long into a string with leading spaces based on the given numDigits. + * This allows the stings to be ordered as if they were longs. + * @param value the value to convert + * @param numDigits the maximum required number of total spaces for digits. + * @return the given long into a string with leading spaces + */ + public static String getString(final long value, final int numDigits) { + return characterPad(Long.toString(value), numDigits, ' ', false); + } + + /** + * Converts the given String back to a long by trimming any leading or trailing spaces. + * @param value the given string to convert + * @return the given String back to a long + */ + public static long getLong(final String value) { + return Long.parseLong(value.trim()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/658) ########## src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.common.ArrayOfStringsSerDe; +import org.apache.datasketches.kll.KllItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class KllItemsSketchFillRequestLongAsString implements SketchFillRequest<String, KllItemsSketch<String>> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public KllItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public KllItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public KllItemsSketch<String> getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + long lower = Long.parseLong(lowerQuantile.trim()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/656) ########## src/main/java/org/apache/datasketches/partitions/Partitioner.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.lang.Math.round; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; +import org.apache.datasketches.quantilescommon.Stack; + +/** + * A partitioning process that can partition very large data sets into thousands to millions + * of partitions of approximately the same size. + * @param T the data type Review Comment: ## Spurious Javadoc @param tags @param tag "T" does not match any actual type parameter of type "Partitioner". [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/659) ########## src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.common.ArrayOfStringsSerDe; +import org.apache.datasketches.kll.KllItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class KllItemsSketchFillRequestLongAsString implements SketchFillRequest<String, KllItemsSketch<String>> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public KllItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public KllItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public KllItemsSketch<String> getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + long lower = Long.parseLong(lowerQuantile.trim()); + long upper = Long.parseLong(upperQuantile.trim()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/657) ########## src/main/java/org/apache/datasketches/partitions/Partitioner.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.lang.Math.round; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; +import org.apache.datasketches.quantilescommon.Stack; + +/** + * A partitioning process that can partition very large data sets into thousands to millions + * of partitions of approximately the same size. + * @param T the data type + * @param S the quantiles sketch that implements both QuantilesGenericAPI and PartitioningFeature. Review Comment: ## Spurious Javadoc @param tags @param tag "S" does not match any actual type parameter of type "Partitioner". [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/660) ########## src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.quantiles.ItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class ItemsSketchFillRequestLongAsString implements SketchFillRequest<String, ItemsSketch<String>> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public ItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public ItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public ItemsSketch<String> getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + final ItemsSketch<String> sk = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder()); + final long lower = Long.parseLong(lowerQuantile.trim()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/654) ########## src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.quantiles.ItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class ItemsSketchFillRequestLongAsString implements SketchFillRequest<String, ItemsSketch<String>> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public ItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public ItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public ItemsSketch<String> getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + final ItemsSketch<String> sk = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder()); + final long lower = Long.parseLong(lowerQuantile.trim()); + final long upper = Long.parseLong(upperQuantile.trim()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/datasketches-java/security/code-scanning/655) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
