dcapwell commented on code in PR #50: URL: https://github.com/apache/cassandra-accord/pull/50#discussion_r1262791829
########## accord-core/src/test/java/accord/utils/SimpleBitSetTest.java: ########## @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package accord.utils; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; +import java.util.Random; +import java.util.function.BiConsumer; +import java.util.function.IntConsumer; + +import com.google.common.collect.Lists; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class SimpleBitSetTest +{ + private static final int NOT_FOUND = Integer.MAX_VALUE; + + private static class Check + { + final SimpleBitSet test; + final BitSet canon; + final int size; + + private Check(SimpleBitSet test, BitSet canon, int size) + { + this.test = test; + this.canon = canon; + this.size = size; + } + + void check(Random random) + { + assertEquals(canon.cardinality(), test.setBitCount()); + assertEquals(canon.nextSetBit(0), test.firstSetBit()); + assertEquals(normaliseNotFound(canon.nextSetBit(0)), test.firstSetBit(NOT_FOUND)); + assertEquals(canon.previousSetBit(size), test.lastSetBit()); + assertEquals(normaliseNotFound(canon.previousSetBit(size)), test.lastSetBit(NOT_FOUND)); + + forIndices(random, i -> assertEquals(canon.nextSetBit(i), test.nextSetBit(i))); + forIndices(random, i -> assertEquals(normaliseBefore(canon.nextSetBit(0), i), test.firstSetBitBefore(i))); + forIndices(random, i -> assertEquals(normaliseNotFoundBefore(canon.nextSetBit(0), i), test.firstSetBitBefore(i, NOT_FOUND))); + forRanges(random, (i, j) -> assertEquals(normaliseBefore(canon.nextSetBit(i), j), test.nextSetBitBefore(i, j))); + forRanges(random, (i, j) -> assertEquals(normaliseNotFoundBefore(canon.nextSetBit(i), j), test.nextSetBitBefore(i, j, NOT_FOUND))); + + forIndices(random, i -> assertEquals(i == 0 ? -1 : canon.previousSetBit(i - 1), test.prevSetBit(i))); + forIndices(random, i -> assertEquals(normaliseNotBefore(size == 0 ? -1 : canon.previousSetBit(size - 1), i), test.lastSetBitNotBefore(i))); + forIndices(random, i -> assertEquals(normaliseNotFoundNotBefore(size == 0 ? -1 : canon.previousSetBit(size - 1), i), test.lastSetBitNotBefore(i, NOT_FOUND))); + forRanges(random, (i, j) -> assertEquals(normaliseNotBefore(j == 0 ? -1 : canon.previousSetBit(j - 1), i), test.prevSetBitNotBefore(j, i))); + forRanges(random, (i, j) -> assertEquals(normaliseNotFoundNotBefore(j == 0 ? -1 : canon.previousSetBit(j - 1), i), test.prevSetBitNotBefore(j, i, NOT_FOUND))); + + List<Integer> canonCollect = new ArrayList<>(), testCollect = new ArrayList<>(); + canon.stream().forEachOrdered(canonCollect::add); + test.forEach(testCollect, List::add); + assertEquals(canonCollect, testCollect); + + canonCollect = Lists.reverse(canonCollect); + testCollect.clear(); + test.reverseForEach(testCollect, List::add); + assertEquals(canonCollect, testCollect); + } + + void forIndices(Random random, IntConsumer consumer) + { + for (int c = 0 ; c < 100 ; ++c) + { + int i = random.nextInt(size + 1); + consumer.accept(i); + } + } + + void forRanges(Random random, BiConsumer<Integer, Integer> consumer) + { + for (int c = 0 ; c < 100 ; ++c) + { + int i = random.nextInt(size + 1); + int j = random.nextInt(size + 1); + if (i > j) { int t = i; i = j; j = t; } + consumer.accept(i, j); + } + } + + static Check generate(Random random, int maxSize, int modCount, int runLength, float runChance, float clearChance) + { + int size = random.nextInt(maxSize); + runLength = Math.min(size, runLength); + BitSet canon = new BitSet(size); + SimpleBitSet test = new SimpleBitSet(size); + if (size > 0) + { + while (modCount-- > 0) + { + boolean set = random.nextFloat() >= clearChance; + boolean run = runLength > 1 && random.nextFloat() < runChance; + if (run) + { + int i = random.nextInt(size); + int j = random.nextInt(size); + if (j < i) { int t = i; i = j; j = t; } + j = Math.min(i + 1 + random.nextInt(runLength - 1), j); + + if (set) + { + canon.set(i, j); + test.setRange(i, j); + } + else + { + canon.clear(i, j); + while (i < j) + test.unset(i++); + } + } + else + { + int i = random.nextInt(size); + if (set) + { + assertEquals(!canon.get(i), test.set(i)); + canon.set(i); + } + else + { + assertEquals(canon.get(i), test.unset(i)); + canon.clear(i); + } + } + assertEquals(canon.cardinality(), test.setBitCount()); + } + } + return new Check(test, canon, size); + } + } + + @Test + public void testRandomBitSets() + { + Random random = new Random(); + long seed = random.nextLong(); + System.err.println("Seed: " + seed); + random.setSeed(seed); + testRandomBitSets(random, 100000); Review Comment: > I don't think this is a huge amount of code duplication though, if you prefer catching and throwing the seed. Just a couple of lines. So I'm also happy to write the tests this way if that's how you prefer I think this gets back to a drastic difference between the two of us. I strongly feel duplication is a waste and a drag on the code, and you seem to feel it's desirable; in this case I have yet to see any value in the duplication. > though we should really address why you are finding our logs so mangled. I honestly don't see anything here to address... we could change how we log everything, but that doesn't solve the problem as JUnit has to send this all back to a text file that has to be parsed, so it has to make trade offs... By relying on exceptions rather than logs, you improve the user experience for every developer on the project as they don't have to read the code then try to find the log; the seed is right there when you throw without any searching... > My preferred approach is to create some class that pairs some test with some canon(ical) version, and to randomise the generation and verification of these within the class I have done this many times. For Accord I wrote a fuzz test that generates valid Accord txn, queries C*, then validates the result against a model I built... I don't see how the qt apis make this harder than hand rolling; I personally found it faster as I had so much infrastructure in place to built up from (see `Gens`, `Generators`, `CassandraGenerators`, and `AbstractTypeGenerators`)! As I wrote 90% of this I am super familiar, so there is a bias I have to acknowledge here. > There should then ideally be some meta-randomisation around the parameters to each of these phases, particularly the generation phase. this is what qt does... > I think the qt abstraction encourages relatively narrow tests, which does permit for a quicker broader array of unique tests to be written, but the tests themselves tend to each be comparatively narrow, so there is less multiplexing of behaviour. This 100% depends on the author; you can write very narrow tests, or very complex ones... I do not see what in the API makes this harder than hand rolling? You have raw access to `Random` (which is what your test uses), and you have primitives to generate lists of arguments; what you do in the test is 100% up to the author. > It also seems to make it less natural to express meta-randomisation, though in principle it might be possible to do that for you, this is something that should really be expressed by the test author and any API we encourage should make that a core feature/requirement. I still don't see how this isn't a core feature... but would love to see what kind of API you desire that solves this... every example (outside of Simulator) in Accord and C* I have seen could be rewritten to the qt model without issues. > I would be in favour of producing some better tools for meta-randomisation, where we want to declare not only the range of values but the ways in which they are randomised (rather than simply uniform over a statically defined range). I'm not sure exactly what that would look like, but this would be the most valuable tooling for improving fuzz testing IMO. Would love to see your API as I already see what you are saying existing in the qt library today... maybe if you showed such an API (without implementation) I could really see the gap you are trying to express. Now, navigating back to this ticket and away from the general conversation about fuzz testing... for this ticket you can solve this problem by throws but I strongly prefer you to reuse code rather than rewrite when this isn't needed; I will not block on this, but do feel its not valuable code to have. I am +0 if you make the original code correct vs use the qt library. Now, after this ticket, I would love to see your ideas around fuzzing as its really not clear to me how your views do not 100% align, so dying to see the delta and ways we can improve for Accord and C* itself. There is a lot of moving pieces right now doing random/fuzz testing in C*, so general solutions can help make all these moving pieces better. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

