This is an automated email from the ASF dual-hosted git repository.
sajjad pushed a commit to branch hotfix-theta-sketch
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/hotfix-theta-sketch by this
push:
new e54888f597 Backwards compatible theta sketch aggregation (#12288)
e54888f597 is described below
commit e54888f597105216666ee3b0f4d28596434e9652
Author: David Cromberge <[email protected]>
AuthorDate: Sun Jan 21 22:42:08 2024 +0000
Backwards compatible theta sketch aggregation (#12288)
* Backwards compatible theta sketch aggregation
Servers running on versions before upgrading Pinot to the
ThetaSketchAccumulator
would return Sketches directly to the merge function. This ensures that
there
is backwards compatibility between the two.
* Add Theta Sketch distinct count queries to compatibility check queries
---
.../config/queries/feature-test-1-sql.queries | 6 +++---
.../config/queries/feature-test-2-sql-realtime.queries | 4 ++--
.../query-results/feature-test-1-rest-sql.results | 6 +++---
.../query-results/feature-test-2-sql-realtime.results | 4 ++--
.../DistinctCountThetaSketchAggregationFunction.java | 18 ++++++++++++++++--
5 files changed, 26 insertions(+), 12 deletions(-)
diff --git
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
index 37a6120a5d..38b8484243 100644
---
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
+++
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
@@ -22,7 +22,7 @@ SELECT count(*) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__
SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2),
max(longDimSV1), maxMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__
SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1),
avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__
SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90),
percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90),
percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__
-SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1),
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest1
WHERE generationNumber = __GENERATION_NUMBER__
+SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1),
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1),
distinctCountThetaSketch(longDimSV1) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__
# Selection
SELECT longDimSV2, stringDimSV1, textDim1, bytesDimSV1 FROM FeatureTest1 WHERE
generationNumber = __GENERATION_NUMBER__ ORDER BY longDimSV2 LIMIT 9
@@ -46,14 +46,14 @@ SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1
WHERE generationNumber
SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1),
min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY
longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1),
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1),
minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80),
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80),
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80),
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY
longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
# Selection & Filtering & Grouping on Aggregation
SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1 WHERE
generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND
longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN
('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1,
intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1),
min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1),
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1),
minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1
LIMIT 5
SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80),
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80),
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80),
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...]
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1
LIMIT 5
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
# Transformation Functions
SELECT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10),
div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest1 WHERE
generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1,
sub(longDimSV2, 3)) DESC, mod(intMetric1, 10)
diff --git
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
index da9c43d7ad..3627205534 100644
---
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
+++
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
@@ -32,7 +32,7 @@ SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1),
minMV(intDimMV2), max
SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1),
avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90),
percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90),
percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
SELECT percentile(longDimSV1, 80.01), percentileMV(intDimMV1, 99.99),
percentileEst(longDimSV1, 80.01), percentileEstMV(intDimMV1, 99.99),
percentileTDigest(longDimSV1, 80.01), percentileTDigestMV(intDimMV1, 99.99)
FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
-SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1),
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest2
WHERE generationNumber = __GENERATION_NUMBER__
+SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1),
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1),
distinctCountThetaSketch(longDimSV1) FROM FeatureTest2 WHERE generationNumber =
__GENERATION_NUMBER__
# Selection & Filtering & Grouping on Aggregation
SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest2 WHERE
generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND
longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN
('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1,
intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20
@@ -40,7 +40,7 @@ SELECT longDimSV1, intDimMV1, sum(intMetric1),
sumMV(intDimMV1), min(intMetric1)
SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1),
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1),
minMaxRangeMV(intDimMV2) FROM FeatureTest2 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1
LIMIT 20
SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80),
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80),
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80),
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest2 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...]
SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80.01),
percentileMV(intDimMV1, 99.99), percentileEst(longDimSV1, 80.01),
percentileEstMV(intDimMV1, 99.99), percentileTDigest(longDimSV1, 80.01),
percentileTDigestMV(intDimMV1, 99.99) FROM FeatureTest2 WHERE generationNumber
= __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10
AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND
intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER [...]
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1) FROM FeatureTest2 WHERE generationNumber =
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1
LIMIT 20
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1),
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1),
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20
# Transformation Functions
SELECT DISTINCT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10),
div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest2 WHERE
generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1,
sub(longDimSV2, 3)) DESC, mod(intMetric1, 10), div(doubleMetric1,
mult(floatMetric1, 5))
diff --git
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
index 83ae247116..aad84fc46e 100644
---
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
+++
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
@@ -22,7 +22,7 @@
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[4.294967536E9,-2.147479976E9,0.0,6.0,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":40,"numGroupsLi
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[10,19,114.09000263214111,1516.9,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFil
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["percentile(longDimSV1,
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1,
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1,
80.0)","percentiletdigestmv(intDimMV1,
90.0)"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme
[...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":6,"segmentStati
[...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":f
[...]
# Selection
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","STRING","STRING","BYTES"],"columnNames":["longDimSV2","stringDimSV1","textDim1","bytesDimSV1"]},"rows":[[2,"s1-0","Java
C++ Python","4877625602"],[2,"s1-0","Java C++
Python","01a0bc"],[21,"s1-2","Java C++ golang","13225573e3f5"],[21,"s1-2","Java
C++ golang","deadbeef"],[22,"s1-4","Java C++
golang","deed0507"],[32,"s1-5","golang shell bash",""],[6777,"s1-7","golang
Java","d54d0507"],[7621,"s1-6","C++ golang python","deed0507"],[7621 [...]
@@ -42,14 +42,14 @@
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0.0,-2.147483648E9,0.0,22.0,-9.223372036854776E18,-2.147483648E9],[1,3,20.0,14.0,10.0,6.0,1.0,4.0],[1,4,20.0,14.0,10.0,6.0,1.0,4.0],[11,42,20.0,148.0,10.0,62.0,11.0,42.0],[11,32,20
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0.0,57.0,0.0,70.0],[1,3,2,4,12.100000381469727,6.5,0.0,1.0],[1,4,2,4,12.100000381469727,6.5,0.0,1.0],[11,42,2,4,22.100000381469727,67.0,0.0,10.0],[11,32,2,4,22.1000
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1,
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1,
80.0)","percentiletdigestmv(intDimMV1,
90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547
[...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[1,3,1,2,1,2],[1,4,1,2,1,2],[11,42,1,2,1,2],[11,32,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatche
[...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[1,3,1,2,1,2,1],[1,4,1,2,1,2,1],[11,42,1,2,1,2,1],[11,32,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmen
[...]
# Selection & Filtering & Grouping on Aggregation
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG"],"columnNames":["longDimSV1","intDimMV1","count(*)"]},"rows":[[-9223372036854775808,-2147483648,1],[11,32,2],[11,42,2],[41,42,1],[41,52,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":4,"numEntriesScannedPostFilter":8,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":8,"segmentStatistics":[],"traceInfo":{},
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0,-2147483648,0,22,-9223372036854776000,-2147483648],[11,32,20,148,10,62,11,42],[11,42,20,148,10,62,11,42],[41,42,14,94,14,72,41,52],[41,52,14,94,14,72,41,52]]},"exceptions":[],"nu
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0,57,0,70],[11,32,2,4,22.100000381469727,67,0,10],[11,42,2,4,22.100000381469727,67,0,10],[41,42,1,2,24.100000381469727,77,0,10],[41,52,1,2,24.100000381469727,77,0,1
[...]
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1,
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1,
80.0)","percentiletdigestmv(intDimMV1,
90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854776000
[...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa
[...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe
[...]
# Transformation Functions
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"],"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[0.0,0.0,0.2165289187934522
[...]
diff --git
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
index 47f7a2805c..849020c104 100644
---
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
+++
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
@@ -29,7 +29,7 @@
{"resultTable":{"dataSchema":{"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[66,125,105.11969939145175,1383.2575757575758,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesS
[...]
{"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1,
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1,
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1,
80.0)","percentiletdigestmv(intDimMV1,
90.0)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme
[...]
{"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1,
80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1,
80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1,
80.01)","percentiletdigestmv(intDimMV1,
99.99)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"nu
[...]
-{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["INT","INT","LONG","LONG"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":5,"offlineThre
[...]
+{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["INT","INT","LONG","LONG","LONG"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached":
[...]
# Selection & Filtering & Grouping on Aggregation
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)"],"columnDataTypes":["LONG","INT","LONG"]},"rows":[[-9223372036854775808,-2147483648,7],[11,32,14],[11,42,14],[41,42,7],[41,52,7]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":28,"numEntriesScannedPostFilter":56,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":6,"offlineThreadCpuTimeNs":0,"realti
[...]
@@ -37,7 +37,7 @@
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,7,7,0.0,57.0,0.0,70.0],[11,32,14,28,22.100000381469727,67.0,0.0,10.0],[11,42,14,28,22.100000381469727,67.0,0.0,10.0],[41,42,7,14,24.100000381469727,77.0,0.0,10.0],[41,5
[...]
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1,
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1,
80.0)","percentiletdigestmv(intDimMV1,
90.0)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547
[...]
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1,
80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1,
80.01)","percentiletdigestmv(intDimMV1,
99.99)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720
[...]
-{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa
[...]
+{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe
[...]
# Transformation Functions
{"resultTable":{"dataSchema":{"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"],"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[-9.223372036854776E18,0.0,"Infinity"]]},"exceptions":[],"numServersQueried":1,"numServersRe
[...]
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
index 4a9a846acd..9f250aff8a 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
@@ -985,8 +985,8 @@ public class DistinctCountThetaSketchAggregationFunction
int numAccumulators = acc1.size();
List<ThetaSketchAccumulator> mergedAccumulators = new
ArrayList<>(numAccumulators);
for (int i = 0; i < numAccumulators; i++) {
- ThetaSketchAccumulator thetaSketchAccumulator1 = acc1.get(i);
- ThetaSketchAccumulator thetaSketchAccumulator2 = acc2.get(i);
+ ThetaSketchAccumulator thetaSketchAccumulator1 =
convertSketchAccumulator(acc1.get(i));
+ ThetaSketchAccumulator thetaSketchAccumulator2 =
convertSketchAccumulator(acc2.get(i));
if (thetaSketchAccumulator1.isEmpty()) {
mergedAccumulators.add(thetaSketchAccumulator2);
continue;
@@ -1025,6 +1025,20 @@ public class DistinctCountThetaSketchAggregationFunction
return
Math.round(evaluatePostAggregationExpression(_postAggregationExpression,
mergedSketches).getEstimate());
}
+ // This ensures backward compatibility with servers that still return
sketches directly.
+ // The AggregationDataTableReducer casts intermediate results to Objects and
although the code compiles,
+ // types might still be incompatible at runtime due to type erasure.
+ // Due to performance overheads of redundant casts, this should be removed
at some future point.
+ private ThetaSketchAccumulator convertSketchAccumulator(Object mergeResult) {
+ if (mergeResult instanceof Sketch) {
+ Sketch sketch = (Sketch) mergeResult;
+ ThetaSketchAccumulator accumulator = new
ThetaSketchAccumulator(_setOperationBuilder, _accumulatorThreshold);
+ accumulator.apply(sketch);
+ return accumulator;
+ }
+ return (ThetaSketchAccumulator) mergeResult;
+ }
+
/**
* Helper method to collect expressions in the filter.
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]