JayajP commented on code in PR #28995:
URL: https://github.com/apache/beam/pull/28995#discussion_r1360911010
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/util/HistogramData.java:
##########
@@ -77,6 +78,54 @@ public static HistogramData linear(double start, double
width, int numBuckets) {
return new HistogramData(LinearBuckets.of(start, width, numBuckets));
}
+ /**
+ * Returns a histogram object wiht exponential boundaries. The input
parameter {@code scale}
+ * determines a coefficient 'base' which species bucket boundaries.
+ *
+ * <pre>
+ * base = 2**(2**(-scale)) e.g.
+ * scale=1 => base=2**(1/2)=sqrt(2)
+ * scale=0 => base=2**(1)=2
+ * scale=-1 => base=2**(2)=4
+ * </pre>
+ *
+ * This bucketing strategy makes it simple/numerically stable to compute
bucket indexes for
+ * datapoints.
+ *
+ * <pre>
+ * Bucket boundaries are given by the following table where n=numBuckets.
+ * | 'Bucket Index' | Bucket Boundaries |
+ * |---------------|---------------------|
+ * | Underflow | (-inf, 0) |
+ * | 0 | [0, base) |
+ * | 1 | [base, base^2) |
+ * | 2 | [base^2, base^3) |
+ * | i | [base^i, base^(i+1))|
+ * | n-1 | [base^(n-1), base^n)|
+ * | Overflow | [base^n, inf) |
+ * </pre>
+ *
+ * <pre>
+ * Example sacle/boundaries:
+ * When scale=1, buckets 0,1,2...i have lowerbounds 0, 2^(1/2), 2^(2/2), ...
2^(i/2).
+ * When scale=0, buckets 0,1,2...i have lowerbounds 0, 2, 2^2, ... 2^(i).
+ * When scale=-1, buckets 0,1,2...i have lowerbounds 0, 4, 4^2, ... 4^(i).
+ * </pre>
+ *
+ * Scale parameter is similar to OpenTelemetry's notion of
ExponentialHistogram.
Review Comment:
Done.
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/util/HistogramData.java:
##########
@@ -77,6 +78,54 @@ public static HistogramData linear(double start, double
width, int numBuckets) {
return new HistogramData(LinearBuckets.of(start, width, numBuckets));
}
+ /**
+ * Returns a histogram object wiht exponential boundaries. The input
parameter {@code scale}
+ * determines a coefficient 'base' which species bucket boundaries.
+ *
+ * <pre>
+ * base = 2**(2**(-scale)) e.g.
+ * scale=1 => base=2**(1/2)=sqrt(2)
+ * scale=0 => base=2**(1)=2
+ * scale=-1 => base=2**(2)=4
+ * </pre>
+ *
+ * This bucketing strategy makes it simple/numerically stable to compute
bucket indexes for
+ * datapoints.
+ *
+ * <pre>
+ * Bucket boundaries are given by the following table where n=numBuckets.
+ * | 'Bucket Index' | Bucket Boundaries |
+ * |---------------|---------------------|
+ * | Underflow | (-inf, 0) |
+ * | 0 | [0, base) |
+ * | 1 | [base, base^2) |
+ * | 2 | [base^2, base^3) |
+ * | i | [base^i, base^(i+1))|
+ * | n-1 | [base^(n-1), base^n)|
+ * | Overflow | [base^n, inf) |
+ * </pre>
+ *
+ * <pre>
+ * Example sacle/boundaries:
Review Comment:
Done.
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/util/HistogramData.java:
##########
@@ -227,6 +276,133 @@ public interface BucketType extends Serializable {
double getAccumulatedBucketSize(int endIndex);
}
+ @AutoValue
+ public abstract static class ExponentialBuckets implements BucketType {
+
+ public abstract double getBase();
+
+ public abstract int getScale();
+
+ /**
+ * Set to 2**scale which is equivalent to 1/log_2(base). Precomputed to
use in {@code
+ * getBucketIndexPositiveScale}
+ */
+ public abstract double getInvLog2GrowthFactor();
+
+ @Override
+ public abstract int getNumBuckets();
+
+ /* Precomputed since this value is used everytime a datapoint is recorded.
*/
+ @Override
+ public abstract double getRangeTo();
+
+ public static ExponentialBuckets of(int scale, int numBuckets) {
+ if (scale < -3) {
+ throw new IllegalArgumentException(
+ String.format("Scale should be greater than -3: %d", scale));
+ }
+
+ if (scale > 3) {
+ throw new IllegalArgumentException(String.format("Scale should be less
than 3: %d", scale));
+ }
+ if (numBuckets <= 0) {
+ throw new IllegalArgumentException(
+ String.format("numBuckets should be greater than zero: %d",
numBuckets));
+ }
+
+ double invLog2GrowthFactor = Math.pow(2, scale);
+ double base = Math.pow(2, Math.pow(2, -scale));
+ int clippedNumBuckets = ExponentialBuckets.computeNumberOfBuckets(scale,
numBuckets);
+ double rangeTo = Math.pow(base, clippedNumBuckets);
+ return new AutoValue_HistogramData_ExponentialBuckets(
+ base, scale, invLog2GrowthFactor, clippedNumBuckets, rangeTo);
+ }
+
+ /**
+ * numBuckets is clipped so that the largest bucket's lower bound is not
greater than 2^31-1
+ * (uint32 max).
+ */
+ private static int computeNumberOfBuckets(int scale, int inputNumBuckets) {
+ if (scale == 0) {
+ // When base=2 then the bucket at index 31 contains [2^31, 2^32).
+ return Math.min(32, inputNumBuckets);
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]