thomasrebele commented on code in PR #6036: URL: https://github.com/apache/hive/pull/6036#discussion_r2285953613
########## ql/src/test/org/apache/hadoop/hive/ql/exec/vector/aggregation/TestVectorAggregation.java: ########## @@ -314,19 +306,35 @@ private boolean checkDecimal64(boolean tryDecimal64, TypeInfo typeInfo) { return result; } + /** + * Generate a random number according to a distribution with the following properties: + * + * <ul> + * <li>the probability decreases linearly from 1 (most probable) to <code>maxSize</code> (least probable)</li> + * <li>1 is <emph>maxSize</emph>-times more likely than <code>maxSize</code></li> + * </ul> + * + * @return a number from 1 to <code>maxSize</code> (both inclusive) + */ public static int getLinearRandomNumber(Random random, int maxSize) { - //Get a linearly multiplied random number - int randomMultiplier = maxSize * (maxSize + 1) / 2; - int randomInt = random.nextInt(randomMultiplier); - - //Linearly iterate through the possible values to find the correct one - int linearRandomNumber = 0; - for(int i=maxSize; randomInt >= 0; i--){ - randomInt -= i; - linearRandomNumber++; - } - - return linearRandomNumber; + // Explanatory example: maxSize is 4, then the numbers 1 to 4 are distributed according to + // 1:****, 2:***, 3:**, 4:* + // The number of stars is a triangular number, so 10 in the example + int triangularNumber = maxSize * (maxSize + 1) / 2; + // Pick a random star + int randomInt = random.nextInt(triangularNumber); + + // Invert the problem: 1:*, 2:**, 3:***, 4:**** + // So in the example, star index 0 becomes index 9 and vice versa + randomInt = triangularNumber - randomInt - 1; + // Use the formula for triangular numbers to convert from the star index to the number + // n*(n+1)/2 = s + // n*n + n - 2s = 0 + // then use the greater solution of the quadratic formula + // n = ( -1 + sqrt(1-4*1*(-2s)) )/2 + int n = (int)(-1 + Math.sqrt(1+8* (randomInt)))/2; Review Comment: Thanks for spotting! Fixed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org