JAMES-2290 generate a random stream of elements respecting distribution constraints
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/0a2a7348 Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/0a2a7348 Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/0a2a7348 Branch: refs/heads/master Commit: 0a2a7348a07a5660d8fc94e88417825ce080e74a Parents: 4724846 Author: Matthieu Baechler <[email protected]> Authored: Wed Jan 17 17:01:59 2018 +0100 Committer: benwa <[email protected]> Committed: Fri Jan 19 18:57:57 2018 +0700 ---------------------------------------------------------------------- server/testing/pom.xml | 10 ++ .../james/utils/DiscreteDistribution.java | 83 +++++++++++++ .../james/utils/DiscreteDistributionTest.java | 115 +++++++++++++++++++ 3 files changed, 208 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/0a2a7348/server/testing/pom.xml ---------------------------------------------------------------------- diff --git a/server/testing/pom.xml b/server/testing/pom.xml index c42f564..eadf713 100644 --- a/server/testing/pom.xml +++ b/server/testing/pom.xml @@ -58,10 +58,20 @@ <artifactId>guava</artifactId> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.4.1</version> + </dependency> + <dependency> <groupId>commons-net</groupId> <artifactId>commons-net</artifactId> </dependency> <dependency> + <groupId>io.vavr</groupId> + <artifactId>vavr</artifactId> + <version>0.9.2</version> + </dependency> + <dependency> <groupId>org.assertj</groupId> <artifactId>assertj-core</artifactId> <scope>test</scope> http://git-wip-us.apache.org/repos/asf/james-project/blob/0a2a7348/server/testing/src/main/java/org/apache/james/utils/DiscreteDistribution.java ---------------------------------------------------------------------- diff --git a/server/testing/src/main/java/org/apache/james/utils/DiscreteDistribution.java b/server/testing/src/main/java/org/apache/james/utils/DiscreteDistribution.java new file mode 100644 index 0000000..a21a3ca --- /dev/null +++ b/server/testing/src/main/java/org/apache/james/utils/DiscreteDistribution.java @@ -0,0 +1,83 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.utils; + +import java.util.List; + +import org.apache.commons.math3.distribution.EnumeratedDistribution; +import org.apache.commons.math3.util.Pair; + +import com.github.steveash.guavate.Guavate; +import com.google.common.base.Preconditions; + +import io.vavr.collection.Stream; + +public class DiscreteDistribution<T> { + + public static class DistributionEntry<V> { + private final V value; + private final double associatedProbability; + + public DistributionEntry(V value, double associatedProbability) { + Preconditions.checkArgument(value != null); + Preconditions.checkArgument(associatedProbability >= 0, "The occurence count needs to be positive"); + this.value = value; + this.associatedProbability = associatedProbability; + } + + public V getValue() { + return value; + } + + public double getAssociatedProbability() { + return associatedProbability; + } + + public Pair<V, Double> toPair() { + return new Pair<>(value, associatedProbability); + } + } + + public static <T> DiscreteDistribution<T> create(List<DistributionEntry<T>> distribution) { + double totalOccurrenceCount = distribution.stream() + .mapToDouble(DistributionEntry::getAssociatedProbability) + .sum(); + + Preconditions.checkArgument(totalOccurrenceCount > 0, "You need to have some entries with non-zero occurrence count in your distribution"); + return new DiscreteDistribution<>(distribution); + } + + private final EnumeratedDistribution<T> enumeratedDistribution; + + private DiscreteDistribution(List<DistributionEntry<T>> distribution) { + enumeratedDistribution = new EnumeratedDistribution<>(distribution.stream() + .map(DistributionEntry::toPair) + .collect(Guavate.toImmutableList())); + } + + public Stream<T> generateRandomStream() { + return Stream.continually(this::sample); + } + + public T sample() { + return enumeratedDistribution.sample(); + } + +} http://git-wip-us.apache.org/repos/asf/james-project/blob/0a2a7348/server/testing/src/test/java/org/apache/james/utils/DiscreteDistributionTest.java ---------------------------------------------------------------------- diff --git a/server/testing/src/test/java/org/apache/james/utils/DiscreteDistributionTest.java b/server/testing/src/test/java/org/apache/james/utils/DiscreteDistributionTest.java new file mode 100644 index 0000000..ecc63b4 --- /dev/null +++ b/server/testing/src/test/java/org/apache/james/utils/DiscreteDistributionTest.java @@ -0,0 +1,115 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.utils; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.james.utils.DiscreteDistribution.DistributionEntry; +import org.assertj.core.data.Offset; +import org.junit.jupiter.api.Test; + +import com.google.common.collect.ImmutableList; + +class DiscreteDistributionTest { + + @Test + void createShouldNotSupportNegativeDistribution() { + assertThatThrownBy(() -> new DistributionEntry<>("a", -1)) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void createShouldSupportZeroDistribution() { + DiscreteDistribution<String> testee = DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 0), + new DistributionEntry<>("b", 1))); + + assertThat(testee.generateRandomStream().take(10)).containsOnly("b"); + } + + @Test + void createShouldNotSupportEmptyDistribution() { + assertThatThrownBy(() -> DiscreteDistribution.create(ImmutableList.of())) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void createShouldNotSupportEffectivelyEmptyDistribution() { + assertThatThrownBy(() -> DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 0), + new DistributionEntry<>("b", 0)))) + .isInstanceOf(IllegalArgumentException.class); + } + + + @Test + void streamOfSingleDistributionMapShouldAlwaysReturnSameElement() { + DiscreteDistribution<String> testee = DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 1))); + + assertThat(testee.generateRandomStream().take(10)).containsOnly("a"); + } + + @Test + void streamOfEvenDistributionMapShouldReturnSameNumberOfEachElement() { + DiscreteDistribution<String> testee =DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 10), + new DistributionEntry<>("b", 10))); + + Map<String, Long> experimentOutcome = testee.generateRandomStream().take(1_000_000) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + assertThat(experimentOutcome.get("a")).isCloseTo(experimentOutcome.get("b"), Offset.offset(5_000L)); + } + + @Test + void streamOfSpecificDistributionMapShouldReturnTwiceMoreA() { + DiscreteDistribution<String> testee = DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 20), + new DistributionEntry<>("b", 10))); + + Map<String, Long> experimentOutcome = testee.generateRandomStream().take(1_000_000) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + assertThat(experimentOutcome.get("a")).isCloseTo(experimentOutcome.get("b") * 2, Offset.offset(5_000L)); + } + + @Test + void partitionShouldSupportDuplicatedDistributionEntry() { + DiscreteDistribution<String> testee = DiscreteDistribution.create( + ImmutableList.of( + new DistributionEntry<>("a", 10), + new DistributionEntry<>("b", 10), + new DistributionEntry<>("a", 10))); + + Map<String, Long> experimentOutcome = testee.generateRandomStream().take(1_000_000) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + assertThat(experimentOutcome.get("a")).isCloseTo(experimentOutcome.get("b") * 2, Offset.offset(5_000L)); + } + +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
