Github user justinleet commented on a diff in the pull request: https://github.com/apache/metron/pull/958#discussion_r173936212 --- Diff: metron-contrib/metron-performance/src/main/java/org/apache/metron/performance/sampler/BiasedSampler.java --- @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.performance.sampler; + +import com.google.common.base.Splitter; +import com.google.common.collect.Iterables; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; + +public class BiasedSampler implements Sampler { + TreeMap<Double, Map.Entry<Integer, Integer>> discreteDistribution; + public BiasedSampler(List<Map.Entry<Integer, Integer>> discreteDistribution, int max) { + this.discreteDistribution = createDistribution(discreteDistribution, max); + } + + public static List<Map.Entry<Integer, Integer>> readDistribution(File distrFile) throws IOException { + List<Map.Entry<Integer, Integer>> ret = new ArrayList<>(); + System.out.println("Using biased sampler with the following biases:"); + try(BufferedReader br = new BufferedReader(new FileReader(distrFile))) { + int sumLeft = 0; + int sumRight = 0; + for(String line = null;(line = br.readLine()) != null;) { + if(line.startsWith("#")) { + continue; + } + Iterable<String> it = Splitter.on(",").split(line.trim()); + int left = Integer.parseInt(Iterables.getFirst(it, null)); + int right = Integer.parseInt(Iterables.getLast(it, null)); + System.out.println("\t" + left + "% of templates will comprise roughly " + right + "% of sample output"); + ret.add(new AbstractMap.SimpleEntry<>(left, right)); + sumLeft += left; + sumRight += right; + } + if(sumLeft > 100 || sumRight > 100 ) { + throw new IllegalStateException("Neither columns must sum to beyond 100. " + + "The first column is the % of templates. " + + "The second column is the % of the sample that % of template occupies."); + } + else if(sumLeft < 100 && sumRight < 100) { + int left = 100 - sumLeft; + int right = 100 - sumRight; + System.out.println("\t" + left + "% of templates will comprise roughly " + right + "% of sample output"); + ret.add(new AbstractMap.SimpleEntry<>(left, right)); + } + return ret; + } + } + + private static TreeMap<Double, Map.Entry<Integer, Integer>> + createDistribution(List<Map.Entry<Integer, Integer>> discreteDistribution, int max) { + TreeMap<Double, Map.Entry<Integer, Integer>> ret = new TreeMap<>(); + int from = 0; + double weight = 0.0d; + for(Map.Entry<Integer, Integer> kv : discreteDistribution) { + double pctVals = kv.getKey()/100.0; + int to = from + (int)(max*pctVals); + double pctWeight = kv.getValue()/100.0; + ret.put(weight, new AbstractMap.SimpleEntry<>(from, to)); + weight += pctWeight; + from = to; + } + return ret; + } + + @Override + public int sample(Random rng, int limit) { + double weight = rng.nextDouble(); + Map.Entry<Integer, Integer> range = discreteDistribution.floorEntry(weight).getValue(); + return rng.nextInt(range.getValue() - range.getKey()) + range.getKey(); --- End diff -- Jerks like me who provide negatives or zeroes cause ugly exceptions to start showing up. because the nextInt doesn't get happy values.
---