dcapwell commented on code in PR #83:
URL: https://github.com/apache/cassandra-accord/pull/83#discussion_r1511738613


##########
accord-core/src/test/java/accord/utils/Gens.java:
##########
@@ -116,6 +125,253 @@ public static Gen.IntGen pickZipf(int[] array)
         };
     }
 
+    public static Gen.LongGen pickZipf(long[] array)
+    {
+        if (array == null || array.length == 0)
+            throw new IllegalArgumentException("Empty array given");
+        if (array.length == 1)
+            return ignore -> array[0];
+        BigDecimal[] weights = new BigDecimal[array.length];
+        BigDecimal base = BigDecimal.valueOf(Math.pow(2, array.length));
+        weights[0] = base;
+        for (int i = 1; i < array.length; i++)
+            weights[i] = base.divide(BigDecimal.valueOf(i + 1), 
RoundingMode.UP);
+        BigDecimal totalWeights = Stream.of(weights).reduce(BigDecimal.ZERO, 
BigDecimal::add);
+
+        return rs -> {
+            BigDecimal value = 
BigDecimal.valueOf(rs.nextDouble()).multiply(totalWeights);
+            for (int i = 0; i < weights.length; i++)
+            {
+                value = value.subtract(weights[i]);
+                if (value.compareTo(BigDecimal.ZERO) <= 0)
+                    return array[i];
+            }
+            return array[array.length - 1];
+        };
+    }
+
+    public static <T> Gen<T> pickZipf(T... array)
+    {
+        return pickZipf(Arrays.asList(array));
+    }
+
+    public static <T> Gen<T> pickZipf(List<T> array)
+    {
+        if (array == null || array.isEmpty())
+            throw new IllegalArgumentException("Empty array given");
+        if (array.size() == 1)
+            return ignore -> array.get(0);
+        BigDecimal[] weights = new BigDecimal[array.size()];
+        BigDecimal base = BigDecimal.valueOf(Math.pow(2, array.size()));
+        weights[0] = base;
+        for (int i = 1; i < array.size(); i++)
+            weights[i] = base.divide(BigDecimal.valueOf(i + 1), 
RoundingMode.UP);
+        BigDecimal totalWeights = Stream.of(weights).reduce(BigDecimal.ZERO, 
BigDecimal::add);
+
+        return rs -> {
+            BigDecimal value = 
BigDecimal.valueOf(rs.nextDouble()).multiply(totalWeights);
+            for (int i = 0; i < weights.length; i++)
+            {
+                value = value.subtract(weights[i]);
+                if (value.compareTo(BigDecimal.ZERO) <= 0)
+                    return array.get(i);
+            }
+            return array.get(array.size() - 1);
+        };
+    }
+
+    public static Gen<Gen.IntGen> randomWeights(int[] array)
+    {
+        return rs -> {
+            float[] weights = Picker.randomWeights(rs, array.length);
+            return r -> array[index(r, weights)];
+        };
+    }
+
+    public static Gen<Gen.LongGen> randomWeights(long[] array)
+    {
+        return rs -> {
+            float[] weights = Picker.randomWeights(rs, array.length);
+            return r -> array[index(r, weights)];
+        };
+    }
+
+    public static <T> Gen<Gen<T>> randomWeights(T[] array)
+    {
+        return rs -> {
+            float[] weights = Picker.randomWeights(rs, array.length);
+            return r -> array[index(r, weights)];
+        };
+    }
+
+    public static <T> Gen<Gen<T>> randomWeights(List<T> array)
+    {
+        return rs -> {
+            float[] weights = Picker.randomWeights(rs, array.size());
+            return r -> array.get(index(r, weights));
+        };
+    }
+
+    private static int index(RandomSource rs, float[] weights)
+    {
+        int i = Arrays.binarySearch(weights, rs.nextFloat());
+        if (i < 0) i = -1 - i;
+        return i;
+    }
+
+    public static Gen<Gen.IntGen> mixedDistribution(int minInclusive, int 
maxExclusive)
+    {
+        int domainSize = (maxExclusive - minInclusive + 1);
+        if (domainSize < 0)
+            throw new IllegalArgumentException("Range is too large; min=" + 
minInclusive + ", max=" + maxExclusive);
+        int[] array, indexes;
+        if (domainSize > 200) // randomly selected
+        {
+            int numBuckets = 10;

Review Comment:
   when it comes to `randomWeight` and `zipF` having larger arrays doesn't make 
too much sense, so this puts a bias into a smaller array and generates values 
within that section of the range.  
   
   I could make this a config, but I didn't need so can refactor later if needed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to