github-advanced-security[bot] commented on code in PR #15340: URL: https://github.com/apache/druid/pull/15340#discussion_r1443541142
########## extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class SpectatorHistogramComplexMetricSerde extends ComplexMetricSerde +{ + private static final SpectatorHistogramObjectStrategy STRATEGY = new SpectatorHistogramObjectStrategy(); + private final String typeName; + + SpectatorHistogramComplexMetricSerde(String type) + { + this.typeName = type; + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + @Override + public Class<SpectatorHistogram> extractedClass() + { + return SpectatorHistogram.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + final Object object = inputRow.getRaw(metricName); + if (object == null || object instanceof SpectatorHistogram || object instanceof Number) { + return object; + } + if (object instanceof String) { + String objectString = (String) object; + // Ignore empty values + if (objectString.trim().isEmpty()) { + return null; + } + // Treat as long number, if it looks like a number + if (Character.isDigit((objectString).charAt(0))) { + return Long.parseLong((String) object); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/druid/security/code-scanning/5964) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5967) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5970) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java: ########## @@ -0,0 +1,733 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ColumnAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class SpectatorHistogramAggregatorTest extends InitializedNullHandlingTest +{ + public static final String INPUT_DATA_PARSE_SPEC = String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"cost\"]", + " }", + "}" + ); + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private static final SegmentMetadataQueryRunnerFactory METADATA_QR_FACTORY = new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + private static final Map<String, SpectatorHistogram> EXPECTED_HISTOGRAMS = new HashMap<>(); + + static { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + EXPECTED_HISTOGRAMS.put("A", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(30 + 40 + 40 + 40 + 50 + 50), 1L); + EXPECTED_HISTOGRAMS.put("B", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(50 + 20000), 1L); + EXPECTED_HISTOGRAMS.put("C", histogram); + } + + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + public SpectatorHistogramAggregatorTest(final GroupByQueryConfig config) + { + SpectatorHistogramModule.registerSerde(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), config, tempFolder); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ); + } + + @Parameterized.Parameters(name = "{0}") + public static Collection<?> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + constructors.add(new Object[]{config}); + } + return constructors; + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new SpectatorHistogramModule().getJacksonModules().forEach(objectMapper::registerModule); + SpectatorHistogramAggregatorFactory factory = new SpectatorHistogramAggregatorFactory( + "name", + "filedName", + AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID + ); + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void testBuildingHistogramQueryTime() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"longSum\", \"name\": \"cost_sum\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"cost_histogram\", \"fieldName\": " + + "\"cost_sum\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List<ResultRow> results = seq.toList(); + assertResultsMatch(results, 0, "A"); + assertResultsMatch(results, 1, "B"); + assertResultsMatch(results, 2, "C"); + } + + @Test + public void testBuildingAndMergingHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence<Result<TimeseriesResultValue>> seq = (Sequence<Result<TimeseriesResultValue>>) rawseq; + List<Result<TimeseriesResultValue>> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testBuildingAndMergingGroupbyHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(6, results.size()); + + SpectatorHistogram expectedA = new SpectatorHistogram(); + expectedA.add(PercentileBuckets.indexOf(10), 1L); + Assert.assertEquals(expectedA, results.get(0).get(1)); + + SpectatorHistogram expectedB = new SpectatorHistogram(); + expectedB.add(PercentileBuckets.indexOf(30), 1L); + expectedB.add(PercentileBuckets.indexOf(40), 3L); + expectedB.add(PercentileBuckets.indexOf(50), 2L); + Assert.assertEquals(expectedB, results.get(1).get(1)); + + SpectatorHistogram expectedC = new SpectatorHistogram(); + expectedC.add(PercentileBuckets.indexOf(50), 1L); + expectedC.add(PercentileBuckets.indexOf(20000), 1L); + Assert.assertEquals(expectedC, results.get(2).get(1)); + + Assert.assertNull(results.get(3).get(1)); + Assert.assertNull(results.get(4).get(1)); + Assert.assertNull(results.get(5).get(1)); + } + + @Test + public void testBuildingAndCountingHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testBuildingAndCountingHistogramsWithNullFilter() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"],", + " \"filter\": {\n", + " \"fields\": [\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"0\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " },\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " }\n", + " ],\n", + " \"type\": \"and\"\n", + " }", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testIngestAsHistogramDistribution() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestHistogramsTimer() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestingPreaggregatedHistograms() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("pre_agg_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence<Result<TimeseriesResultValue>> seq = (Sequence<Result<TimeseriesResultValue>>) rawseq; + List<Result<TimeseriesResultValue>> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testMetadataQueryTimer() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() {} + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List<SegmentAnalysis> results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map<String, ColumnAnalysis> columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramTimer", columns.get("histogram").getType()); + } + + @Test + public void testMetadataQueryDistribution() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() { } + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List<SegmentAnalysis> results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map<String, ColumnAnalysis> columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramDistribution", columns.get("histogram").getType()); Review Comment: ## Deprecated method or constructor invocation Invoking [ColumnAnalysis.getType](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/5966) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java: ########## @@ -0,0 +1,733 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ColumnAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class SpectatorHistogramAggregatorTest extends InitializedNullHandlingTest +{ + public static final String INPUT_DATA_PARSE_SPEC = String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"cost\"]", + " }", + "}" + ); + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private static final SegmentMetadataQueryRunnerFactory METADATA_QR_FACTORY = new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + private static final Map<String, SpectatorHistogram> EXPECTED_HISTOGRAMS = new HashMap<>(); + + static { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + EXPECTED_HISTOGRAMS.put("A", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(30 + 40 + 40 + 40 + 50 + 50), 1L); + EXPECTED_HISTOGRAMS.put("B", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(50 + 20000), 1L); + EXPECTED_HISTOGRAMS.put("C", histogram); + } + + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + public SpectatorHistogramAggregatorTest(final GroupByQueryConfig config) + { + SpectatorHistogramModule.registerSerde(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), config, tempFolder); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ); + } + + @Parameterized.Parameters(name = "{0}") + public static Collection<?> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + constructors.add(new Object[]{config}); + } + return constructors; + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new SpectatorHistogramModule().getJacksonModules().forEach(objectMapper::registerModule); + SpectatorHistogramAggregatorFactory factory = new SpectatorHistogramAggregatorFactory( + "name", + "filedName", + AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID + ); + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void testBuildingHistogramQueryTime() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"longSum\", \"name\": \"cost_sum\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"cost_histogram\", \"fieldName\": " + + "\"cost_sum\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List<ResultRow> results = seq.toList(); + assertResultsMatch(results, 0, "A"); + assertResultsMatch(results, 1, "B"); + assertResultsMatch(results, 2, "C"); + } + + @Test + public void testBuildingAndMergingHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence<Result<TimeseriesResultValue>> seq = (Sequence<Result<TimeseriesResultValue>>) rawseq; + List<Result<TimeseriesResultValue>> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testBuildingAndMergingGroupbyHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(6, results.size()); + + SpectatorHistogram expectedA = new SpectatorHistogram(); + expectedA.add(PercentileBuckets.indexOf(10), 1L); + Assert.assertEquals(expectedA, results.get(0).get(1)); + + SpectatorHistogram expectedB = new SpectatorHistogram(); + expectedB.add(PercentileBuckets.indexOf(30), 1L); + expectedB.add(PercentileBuckets.indexOf(40), 3L); + expectedB.add(PercentileBuckets.indexOf(50), 2L); + Assert.assertEquals(expectedB, results.get(1).get(1)); + + SpectatorHistogram expectedC = new SpectatorHistogram(); + expectedC.add(PercentileBuckets.indexOf(50), 1L); + expectedC.add(PercentileBuckets.indexOf(20000), 1L); + Assert.assertEquals(expectedC, results.get(2).get(1)); + + Assert.assertNull(results.get(3).get(1)); + Assert.assertNull(results.get(4).get(1)); + Assert.assertNull(results.get(5).get(1)); + } + + @Test + public void testBuildingAndCountingHistograms() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testBuildingAndCountingHistogramsWithNullFilter() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"],", + " \"filter\": {\n", + " \"fields\": [\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"0\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " },\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " }\n", + " ],\n", + " \"type\": \"and\"\n", + " }", + "}" + ) + ); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testIngestAsHistogramDistribution() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestHistogramsTimer() throws Exception + { + Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List<ResultRow> results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestingPreaggregatedHistograms() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("pre_agg_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence<Result<TimeseriesResultValue>> seq = (Sequence<Result<TimeseriesResultValue>>) rawseq; + List<Result<TimeseriesResultValue>> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testMetadataQueryTimer() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() {} + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List<SegmentAnalysis> results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map<String, ColumnAnalysis> columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramTimer", columns.get("histogram").getType()); Review Comment: ## Deprecated method or constructor invocation Invoking [ColumnAnalysis.getType](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/5965) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5968) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(100000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(500000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(10000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(50000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, deserialized.getSum()); + } + + @Test + public void testToBytesBiggestValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 10000000000L); + histogram.add(PercentileBuckets.indexOf(30), 20000000000L); + histogram.add(PercentileBuckets.indexOf(40), 50000000000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Long.BYTES; + Assert.assertEquals("Should not compact larger values", 5 * (keySize + valSize), bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5971) ########## extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java: ########## @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import it.unimi.dsi.fastutil.shorts.Short2LongMap; +import it.unimi.dsi.fastutil.shorts.Short2LongMaps; +import it.unimi.dsi.fastutil.shorts.Short2LongOpenHashMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.java.util.common.parsers.ParseException; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +// Since queries don't come from SpectatorHistogramAggregator in the case of +// using longSum or doubleSum aggregations. They come from LongSumBufferAggregator. +// Therefore, we extended Number here. +// This will prevent class casting exceptions if trying to query with sum rather +// than explicitly as a SpectatorHistogram +// +// The SpectatorHistogram is a Number. That number is of intValue(), +// which is the count of the number of events in the histogram +// (adding up the counts across all buckets). +// +// There are a few useful aggregators, which as Druid Native Queries use: +// type: "longSum" - Aggregates and returns the number of events in the histogram. +// i.e. the sum of all bucket counts. +// type: "spectatorHistogramDistribution" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is a distribution. +// type: "spectatorHistogramTimer" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is measuring time. +public class SpectatorHistogram extends Number +{ + private static final int MAX_ENTRY_BYTES = Short.BYTES + Long.BYTES; + private static final int LOW_COUNT_FLAG = 0x0200; + private static final int BYTE_VALUE = 0x8000; + private static final int SHORT_VALUE = 0x4000; + private static final int INT_VALUE = 0xC000; + private static final int VALUE_SIZE_MASK = 0xFC00; + private static final int KEY_MASK = 0x01FF; + + private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + + // Values are packed into few bytes depending on the size of the counts + // The bucket index falls in the range 0-276, so we need 9 bits for the bucket index. + // Counts can range from 1 to Long.MAX_VALUE, so we need 1 to 64 bits for the value. + // To optimize storage, we use the remaining top 7 bits of the bucket index short to + // encode the storage type for the count value. + // AAbb bbYx xxxx xxxx + // | +-- 9 bits - The bucket index + // +------------- 1 bit - Low-count flag, set if count <= 63 + // ++++ ++-------------- 6 bits - If low-count flag is set, + // The count value, zero extra bytes used. + // If low-count flag is not set, + // The value length indicator as encoded below + // ++------------------- 2 bits - 00 = 8 bytes used for value + // 10 = 1 byte used for value + // 01 = 2 bytes used for value + // 11 = 4 bytes used for value + // + // Example: + // ------------------------------------------------------------------------------------------ + // Consider the histogram: [10, 30, 40x3, 50x2, 100x256] + // That is there is one value of 10, and 3 values of 40, etc. As shown in the table below: + // + // Bucket Index | Bucket Range | Bucket Count + // 10 | [10,11) | 1 + // 17 | [26,31) | 1 + // 19 | [36,41) | 3 + // 21 | [46,51) | 2 + // 25 | [85,106) | 256 + // + // See com.netflix.spectator.api.histogram.PercentileBuckets + // for an explaination of how the bucket index is assigned + // to each of the values: (10, 17, 19, 21, 25). + // + // Based on the specification above the histogram is serialized into a + // byte array to minimize storage size: + // In Base 10: [64, 25, 1, 0, 6, 10, 6, 17, 14, 19, 10, 21] + // In Binary: [01000000, 00011001, 00000001, 00000000, 00000110, 00001010, + // 00000110, 00010001, 00001110, 00010011, 00001010, 00010101] + // + // Each groups of bits (which varies in length), represent a histogram bucket index and count + // 01000000000110010000000100000000 + // 01 - Since the low count bit is NOT set, leading 2 bits 01 indicates that the bucket count + // value is encoded in 2 bytes. + // 0000 - Since the low count bit is Not set these bits are unused, the bucket count will + // be encoded in an additional two bytes. + // 0 - Low count bit is NOT set + // 000011001 - These 9 bits represent the bucket index of 25 + // 0000000100000000 - These 16 bits represent the bucket count of 256 + // + // 0000011000001010 + // 000001 - Low count bit IS set, so these 6-bits represent a bucket count of 1 + // 1 - Low count bit IS set + // 000001010 - These 9 bits represent the bucket index of 10 + // + // 0000011000010001 + // 000001 - Bucket count of 1 + // 1 - Low count bit IS set + // 000010001 - Bucket index of 17 + // + // 0000111000010011 + // 000011 - Bucket count of 3 + // 1 - Low count bit IS set + // 000010011 - Bucket index of 19 + // + // 0000101000010101 + // 000010 - Bucket count of 2 + // 1 - Low count bit IS set + // 000010101 - Bucket index of 21 + // ------------------------------------------------------------------------------------------ + private Short2LongOpenHashMap backingMap; + + // The sum of counts in the histogram. + // These are accumulated when an entry is added, or when another histogram is merged into this one. + private long sumOfCounts = 0; + + static int getMaxIntermdiateHistogramSize() + { + return PercentileBuckets.length() * MAX_ENTRY_BYTES; + } + + @Nullable + static SpectatorHistogram deserialize(Object serializedHistogram) + { + if (serializedHistogram == null) { + return null; + } + if (serializedHistogram instanceof byte[]) { + return fromByteBuffer(ByteBuffer.wrap((byte[]) serializedHistogram)); + } + if (serializedHistogram instanceof SpectatorHistogram) { + return (SpectatorHistogram) serializedHistogram; + } + if (serializedHistogram instanceof String) { + // Try parse as JSON into HashMap + try { + HashMap<String, Long> map = JSON_MAPPER.readerFor(HashMap.class).readValue((String) serializedHistogram); + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry<String, Long> entry : map.entrySet()) { + histogram.add(entry.getKey(), entry.getValue()); + } + return histogram; + } + catch (JsonProcessingException e) { + throw new ParseException((String) serializedHistogram, e, "String cannot be deserialized as JSON to a Spectator Histogram"); + } + } + if (serializedHistogram instanceof HashMap) { + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry<?, ?> entry : ((HashMap<?, ?>) serializedHistogram).entrySet()) { + histogram.add(entry.getKey(), (Number) entry.getValue()); + } + return histogram; + } + throw new ParseException( + null, + "Object cannot be deserialized to a Spectator Histogram " + + serializedHistogram.getClass() + ); + } + + @Nullable + static SpectatorHistogram fromByteBuffer(ByteBuffer buffer) + { + if (buffer == null || !buffer.hasRemaining()) { + return null; + } + SpectatorHistogram histogram = new SpectatorHistogram(); + while (buffer.hasRemaining()) { + short key = buffer.getShort(); + short idx = (short) (key & KEY_MASK); + long val; + if ((key & LOW_COUNT_FLAG) == LOW_COUNT_FLAG) { + // Value/count is encoded in the top 6 bits of the short + val = (key & VALUE_SIZE_MASK) >>> 10; + } else { + switch (key & VALUE_SIZE_MASK) { + case BYTE_VALUE: + val = buffer.get() & 0xFF; + break; + + case SHORT_VALUE: + val = buffer.getShort() & 0xFFFF; + break; + + case INT_VALUE: + val = buffer.getInt() & 0xFFFFFFFFL; + break; + + default: + val = buffer.getLong(); + break; + } + } + + histogram.add(idx, val); + } + if (histogram.isEmpty()) { + return null; + } + return histogram; + } + + private Short2LongOpenHashMap writableMap() + { + if (backingMap == null) { + backingMap = new Short2LongOpenHashMap(); + } + return backingMap; + } + + private Short2LongMap readableMap() + { + if (isEmpty()) { + return Short2LongMaps.EMPTY_MAP; + } + return backingMap; + } + + @Nullable + byte[] toBytes() + { + if (isEmpty()) { + return null; + } + ByteBuffer buffer = ByteBuffer.allocate(MAX_ENTRY_BYTES * size()); + for (Short2LongMap.Entry e : Short2LongMaps.fastIterable(readableMap())) { + short key = e.getShortKey(); + long value = e.getLongValue(); + if (value <= 0x3F) { + // Value/count is encoded in the top 6 bits of the key bytes + buffer.putShort((short) ((key | LOW_COUNT_FLAG) | ((int) ((value << 10) & VALUE_SIZE_MASK)))); + } else if (value <= 0xFF) { + buffer.putShort((short) (key | BYTE_VALUE)); + buffer.put((byte) value); + } else if (value <= 0xFFFF) { + buffer.putShort((short) (key | SHORT_VALUE)); + buffer.putShort((short) value); + } else if (value <= 0xFFFFFFFFL) { + buffer.putShort((short) (key | INT_VALUE)); + buffer.putInt((int) value); + } else { + buffer.putShort(key); + buffer.putLong(value); + } + } + return Arrays.copyOf(buffer.array(), buffer.position()); + } + + void insert(Number num) + { + this.add(PercentileBuckets.indexOf(num.longValue()), 1L); + } + + void merge(SpectatorHistogram source) + { + if (source == null) { + return; + } + Short2LongOpenHashMap writableMap = writableMap(); + for (Short2LongMap.Entry entry : Short2LongMaps.fastIterable(source.readableMap())) { + writableMap.addTo(entry.getShortKey(), entry.getLongValue()); + this.sumOfCounts += entry.getLongValue(); + } + } + + // Exposed for testing + void add(int bucket, long count) + { + if (bucket >= PercentileBuckets.length() || bucket < 0) { + throw new IAE("Bucket index out of range (0, " + PercentileBuckets.length() + ")"); + } + writableMap().addTo((short) bucket, count); + this.sumOfCounts += count; + } + + private void add(Object key, Number value) + { + if (key instanceof String) { + this.add(Integer.parseInt((String) key), value.longValue()); Review Comment: ## Missing catch of NumberFormatException Potential uncaught 'java.lang.NumberFormatException'. [Show more details](https://github.com/apache/druid/security/code-scanning/5963) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(100000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(500000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(10000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(50000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, deserialized.getSum()); + } + + @Test + public void testToBytesBiggestValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 10000000000L); + histogram.add(PercentileBuckets.indexOf(30), 20000000000L); + histogram.add(PercentileBuckets.indexOf(40), 50000000000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Long.BYTES; + Assert.assertEquals("Should not compact larger values", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(10000000000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(20000000000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(50000000000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, deserialized.getSum()); + } + + @Test + public void testToBytesMixedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + histogram.add(PercentileBuckets.indexOf(30), 300L); + histogram.add(PercentileBuckets.indexOf(40), 200000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should not compact larger values", (5 * keySize) + 0 + 2 + 4 + 8 + 8, bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5972) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5969) ########## extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java: ########## @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(100000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(500000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(10000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(50000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, deserialized.getSum()); + } + + @Test + public void testToBytesBiggestValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 10000000000L); + histogram.add(PercentileBuckets.indexOf(30), 20000000000L); + histogram.add(PercentileBuckets.indexOf(40), 50000000000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Long.BYTES; + Assert.assertEquals("Should not compact larger values", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(10000000000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(20000000000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(50000000000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, deserialized.getSum()); + } + + @Test + public void testToBytesMixedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + histogram.add(PercentileBuckets.indexOf(30), 300L); + histogram.add(PercentileBuckets.indexOf(40), 200000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should not compact larger values", (5 * keySize) + 0 + 2 + 4 + 8 + 8, bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(300L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, deserialized.getSum()); + } + + @Test + public void testToBytesBoundaryValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(6, 63L); + histogram.add(7, 64L); + histogram.add(8, 255L); + histogram.add(9, 256L); + histogram.add(16, 65535L); + histogram.add(17, 65536L); + histogram.add(32, 4294967295L); + histogram.add(33, 4294967296L); + + Assert.assertEquals("Should have size matching number of buckets", 8, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8590066300L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should compact", (8 * keySize) + 0 + 1 + 1 + 2 + 2 + 4 + 4 + 8, bytes.length); Review Comment: ## Result of multiplication cast to wider type Potential overflow in [int multiplication](1) before it is converted to long by use in an invocation context. [Show more details](https://github.com/apache/druid/security/code-scanning/5973) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
