davecromberge commented on a change in pull request #369: URL: https://github.com/apache/datasketches-java/pull/369#discussion_r741971189
########## File path: src/test/java/org/apache/datasketches/theta/CornerCaseThetaSetOperationsTest.java ########## @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta; + +import org.testng.annotations.Test; + +public class CornerCaseThetaSetOperationsTest { + + /* Hash Values + * 9223372036854775807 Theta = 1.0 + * + * 6730918654704304314 hash(3L)[0] >>> 1 GT_MIDP + * 4611686018427387904 Theta for p = 0.5f = MIDP + * 2206043092153046979 hash(2L)[0] >>> 1 LT_MIDP_V + * 1498732507761423037 hash(5L)[0] >>> 1 LTLT_MIDP_V + * + * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V + * 922337217429372928 Theta for p = 0.1f = LOWP + * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V + * 405753591161026837 hash(1L)[0] >>> 1 LTLT_LOWP_V + */ + + private static final long GT_MIDP_V = 3L; + private static final float MIDP = 0.5f; + private static final long LT_MIDP_V = 2L; + + private static final long GT_LOWP_V = 6L; + private static final float LOWP = 0.1f; + private static final long LT_LOWP_V = 4L; + + private static final double MIDP_THETA = MIDP; + private static final double LOWP_THETA = LOWP; + + private enum SkType { + NEW, //{ 1.0, 0, T} Bin: 101 Oct: 05 + EXACT, //{ 1.0, >0, F} Bin: 111 Oct: 07, specify only value + ESTIMATION, //{<1.0, >0, F} Bin: 010 Oct: 02, specify only value + NEW_DEGEN, //{<1.0, 0, T} Bin: 001 Oct: 01, specify only p + RESULT_DEGEN //{<1.0, 0, F} Bin: 000 Oct: 0, specify p, value + } Review comment: I incorrectly interpreted the bit for empty, thanks for clarifying. ########## File path: src/test/java/org/apache/datasketches/theta/CornerCaseThetaSetOperationsTest.java ########## @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta; + +import org.testng.annotations.Test; + +public class CornerCaseThetaSetOperationsTest { + + /* Hash Values + * 9223372036854775807 Theta = 1.0 + * + * 6730918654704304314 hash(3L)[0] >>> 1 GT_MIDP + * 4611686018427387904 Theta for p = 0.5f = MIDP + * 2206043092153046979 hash(2L)[0] >>> 1 LT_MIDP_V + * 1498732507761423037 hash(5L)[0] >>> 1 LTLT_MIDP_V + * + * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V + * 922337217429372928 Theta for p = 0.1f = LOWP + * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V + * 405753591161026837 hash(1L)[0] >>> 1 LTLT_LOWP_V + */ + + private static final long GT_MIDP_V = 3L; + private static final float MIDP = 0.5f; + private static final long LT_MIDP_V = 2L; + + private static final long GT_LOWP_V = 6L; + private static final float LOWP = 0.1f; + private static final long LT_LOWP_V = 4L; + + private static final double MIDP_THETA = MIDP; + private static final double LOWP_THETA = LOWP; + + private enum SkType { + NEW, //{ 1.0, 0, T} Bin: 101 Oct: 05 + EXACT, //{ 1.0, >0, F} Bin: 111 Oct: 07, specify only value + ESTIMATION, //{<1.0, >0, F} Bin: 010 Oct: 02, specify only value + NEW_DEGEN, //{<1.0, 0, T} Bin: 001 Oct: 01, specify only p + RESULT_DEGEN //{<1.0, 0, F} Bin: 000 Oct: 0, specify p, value + } + + //NOTE: 0 values in getSketch are not used. + + private static void checks( + UpdateSketch thetaA, + UpdateSketch thetaB, + double resultInterTheta, + int resultInterCount, + boolean resultInterEmpty, + double resultAnotbTheta, + int resultAnotbCount, + boolean resultAnotbEmpty) { + CompactSketch csk; + + //Intersection + Intersection inter = SetOperation.builder().buildIntersection(); + + csk = inter.intersect(thetaA, thetaB); + checkResult("Intersect Stateless Theta, Theta", csk, resultInterTheta, resultInterCount, resultInterEmpty); + csk = inter.intersect(thetaA.compact(), thetaB.compact()); + checkResult("Intersect Stateless Theta, Theta", csk, resultInterTheta, resultInterCount, resultInterEmpty); + + //AnotB + AnotB anotb = SetOperation.builder().buildANotB(); + + csk = anotb.aNotB(thetaA, thetaB); + checkResult("AnotB Stateless Theta, Theta", csk, resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + csk = anotb.aNotB(thetaA.compact(), thetaB.compact()); + checkResult("AnotB Stateless Theta, Theta", csk, resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + + anotb.setA(thetaA); + anotb.notB(thetaB); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Theta, Theta", csk, resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + + anotb.setA(thetaA.compact()); + anotb.notB(thetaB.compact()); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Theta, Theta", csk, resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + } + + + @Test + public void newNew() { + UpdateSketch thetaA = getSketch(SkType.NEW, 0, 0); + UpdateSketch thetaB = getSketch(SkType.NEW, 0, 0); + final double resultInterTheta = 1.0; + final int resultInterCount = 0; + final boolean resultInterEmpty = true; + final double resultAnotbTheta = 1.0; + final int resultAnotbCount = 0; + final boolean resultAnotbEmpty = true; + + checks(thetaA, thetaB, resultInterTheta, resultInterCount, resultInterEmpty, + resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + } + + @Test + public void newExact() { + UpdateSketch thetaA = getSketch(SkType.NEW, 0, 0); + UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V); + final double resultInterTheta = 1.0; + final int resultInterCount = 0; + final boolean resultInterEmpty = true; + final double resultAnotbTheta = 1.0; + final int resultAnotbCount = 0; + final boolean resultAnotbEmpty = true; + + checks(thetaA, thetaB, resultInterTheta, resultInterCount, resultInterEmpty, + resultAnotbTheta, resultAnotbCount, resultAnotbEmpty); + } Review comment: Makes sense, thanks. ########## File path: src/main/java/org/apache/datasketches/tuple/AnotB.java ########## @@ -143,19 +142,58 @@ public void setA(final Sketch<S> skA) { * * @param skB The incoming Tuple sketch for the second (or following) argument <i>B</i>. */ + @SuppressWarnings("unchecked") public void notB(final Sketch<S> skB) { - if (empty_ || skB == null || skB.isEmpty() || hashArr_ == null) { return; } - //skB is not empty - final long thetaLongB = skB.getThetaLong(); - thetaLong_ = Math.min(thetaLong_, thetaLongB); - - //process B - final DataArrays<S> daB = getResultArraysTuple(thetaLong_, curCount_, hashArr_, summaryArr_, skB); - hashArr_ = daB.hashArr; - summaryArr_ = daB.summaryArr; + if (skB == null) { return; } //ignore - curCount_ = hashArr_.length; - empty_ = curCount_ == 0 && thetaLong_ == Long.MAX_VALUE; + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.idToCornerCase(id); + final AnotbResult anotbResult = cCase.getAnotbResult(); + + switch (anotbResult) { + case NEW_1_0_T: { + reset(); + break; Review comment: Thanks for the additional details, I understand the reason now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
