This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 840535f460 Map Pinot's FLOAT data type to Calcite SQL's REAL type to
fix precision issues (#13616)
840535f460 is described below
commit 840535f460cba99b87cb44d7ed5eddafd0d97107
Author: Yash Mayya <[email protected]>
AuthorDate: Thu Jul 18 13:49:53 2024 +0530
Map Pinot's FLOAT data type to Calcite SQL's REAL type to fix precision
issues (#13616)
---
.../queries/feature-test-multi-stage.queries | 2 +-
.../query-results/feature-test-multi-stage.results | 2 +-
.../pinot/integration/tests/custom/ArrayTest.java | 17 +++------
.../tests/custom/FloatingPointDataTypeTest.java | 24 ++++++------
.../org/apache/pinot/query/type/TypeFactory.java | 21 +----------
.../apache/pinot/query/type/TypeFactoryTest.java | 44 +++++++---------------
6 files changed, 34 insertions(+), 76 deletions(-)
diff --git
a/compatibility-verifier/multi-stage-query-engine-test-suite/config/queries/feature-test-multi-stage.queries
b/compatibility-verifier/multi-stage-query-engine-test-suite/config/queries/feature-test-multi-stage.queries
index 00d0c6cb8a..f070318c68 100644
---
a/compatibility-verifier/multi-stage-query-engine-test-suite/config/queries/feature-test-multi-stage.queries
+++
b/compatibility-verifier/multi-stage-query-engine-test-suite/config/queries/feature-test-multi-stage.queries
@@ -20,7 +20,7 @@
# Joins
SELECT COUNT(*) FROM FeatureTest1 ft1 INNER JOIN FeatureTest2 ft2 ON
ft1.stringDimSV1 = ft2.stringDimSV1 WHERE ft1.generationNumber =
__GENERATION_NUMBER__ AND ft2.generationNumber = __GENERATION_NUMBER__
SELECT ft1.stringDimSV1, COUNT(ft1.stringDimSV1) FROM FeatureTest1 ft1 INNER
JOIN FeatureTest2 ft2 ON ft1.stringDimSV1 = ft2.stringDimSV1 WHERE
ft1.generationNumber = __GENERATION_NUMBER__ AND ft2.generationNumber =
__GENERATION_NUMBER__ GROUP BY ft1.stringDimSV1
-SELECT ft1.stringDimSV2, SUM(ft1.floatMetric1) FROM FeatureTest1 ft1 INNER
JOIN FeatureTest2 ft2 ON ft1.stringDimSV1 = ft2.stringDimSV1 WHERE
ft1.generationNumber = __GENERATION_NUMBER__ AND ft2.generationNumber =
__GENERATION_NUMBER__ GROUP BY ft1.stringDimSV2
+SELECT ft1.stringDimSV2, SUM(ft1.doubleMetric1) FROM FeatureTest1 ft1 INNER
JOIN FeatureTest2 ft2 ON ft1.stringDimSV1 = ft2.stringDimSV1 WHERE
ft1.generationNumber = __GENERATION_NUMBER__ AND ft2.generationNumber =
__GENERATION_NUMBER__ GROUP BY ft1.stringDimSV2
SELECT ft1.stringDimSV1 FROM FeatureTest1 ft1 WHERE ft1.generationNumber =
__GENERATION_NUMBER__ AND EXISTS (SELECT 1 FROM FeatureTest2 ft2 WHERE
ft2.generationNumber = __GENERATION_NUMBER__ AND ft2.stringDimSV2 =
ft1.stringDimSV1)
# Set operations
diff --git
a/compatibility-verifier/multi-stage-query-engine-test-suite/config/query-results/feature-test-multi-stage.results
b/compatibility-verifier/multi-stage-query-engine-test-suite/config/query-results/feature-test-multi-stage.results
index b073dd9e1c..d809fc4751 100644
---
a/compatibility-verifier/multi-stage-query-engine-test-suite/config/query-results/feature-test-multi-stage.results
+++
b/compatibility-verifier/multi-stage-query-engine-test-suite/config/query-results/feature-test-multi-stage.results
@@ -20,7 +20,7 @@
# Joins
{"resultTable":{"dataSchema":{"columnNames":["EXPR$0"],"columnDataTypes":["LONG"]},"rows":[[130]]},"requestId":"11345778000000000","stageStats":{"type":"MAILBOX_RECEIVE","executionTimeMs":36,"emittedRows":1,"fanIn":1,"rawMessages":2,"deserializedBytes":714,"upstreamWaitMs":40,"children":[{"type":"MAILBOX_SEND","executionTimeMs":36,"emittedRows":1,"stage":1,"parallelism":1,"fanOut":1,"rawMessages":2,"serializedBytes":99,"serializationTimeMs":2,"children":[{"type":"AGGREGATE","executionTim
[...]
{"resultTable":{"dataSchema":{"columnNames":["stringDimSV1","EXPR$1"],"columnDataTypes":["STRING","LONG"]},"rows":[["s1-4",7],["s1-6",54],["s1-2",28],["s1-0",28],["s1-5",7],["s1-7",6]]},"requestId":"11345778000000001","stageStats":{"type":"MAILBOX_RECEIVE","executionTimeMs":3,"emittedRows":6,"fanIn":1,"rawMessages":2,"deserializedBytes":824,"upstreamWaitMs":3,"children":[{"type":"MAILBOX_SEND","executionTimeMs":2,"emittedRows":6,"stage":1,"parallelism":1,"fanOut":1,"rawMessages":2,"seria
[...]
-{"resultTable":{"dataSchema":{"columnNames":["stringDimSV2","EXPR$1"],"columnDataTypes":["STRING","DOUBLE"]},"rows":[["s2-6",14153.4],["s2-2",618.8],["s2-0",338.8],["s2-4",168.7],["s2-7",1572.6],["null",0.0]]},"requestId":"11345778000000002","stageStats":{"type":"MAILBOX_RECEIVE","executionTimeMs":2,"emittedRows":6,"fanIn":1,"rawMessages":2,"deserializedBytes":826,"upstreamWaitMs":3,"children":[{"type":"MAILBOX_SEND","executionTimeMs":4,"emittedRows":6,"stage":1,"parallelism":1,"fanOut":
[...]
+{"resultTable":{"dataSchema":{"columnNames":["stringDimSV2","EXPR$1"],"columnDataTypes":["STRING","DOUBLE"]},"rows":[["s2-6",14207.4],["s2-2",646.8],["s2-0",366.8],["s2-4",168.7],["s2-7",1578.6],["null",168.7]]},"requestId":"11345778000000002","stageStats":{"type":"MAILBOX_RECEIVE","executionTimeMs":2,"emittedRows":6,"fanIn":1,"rawMessages":2,"deserializedBytes":826,"upstreamWaitMs":3,"children":[{"type":"MAILBOX_SEND","executionTimeMs":4,"emittedRows":6,"stage":1,"parallelism":1,"fanOut
[...]
{"resultTable":{"dataSchema":{"columnNames":["stringDimSV1"],"columnDataTypes":["STRING"]},"rows":[]},"requestId":"11345778000000003","stageStats":{"type":"MAILBOX_RECEIVE","fanIn":1,"rawMessages":1,"deserializedBytes":132,"children":[{"type":"MAILBOX_SEND","stage":1,"parallelism":1,"fanOut":1,"rawMessages":1,"children":[{"type":"LEAF","table":"FeatureTest1","numSegmentsQueried":1,"totalDocs":10,"numSegmentsPrunedByServer":1}]}]},"brokerId":"Broker_192.168.29.25_8099","exceptions":[],"nu
[...]
# Set operations
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
index fa163ff882..921dc6e45e 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
@@ -72,8 +72,7 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
+ "arrayAgg(boolCol, 'BOOLEAN'), "
+ "arrayAgg(intCol, 'INT'), "
+ "arrayAgg(longCol, 'LONG'), "
- // NOTE: FLOAT array is auto converted to DOUBLE array
- + (useMultiStageQueryEngine ? "arrayAgg(floatCol, 'DOUBLE'), " :
"arrayAgg(floatCol, 'FLOAT'), ")
+ + "arrayAgg(floatCol, 'FLOAT'), "
+ "arrayAgg(doubleCol, 'DOUBLE'), "
+ "arrayAgg(stringCol, 'STRING'), "
+ "arrayAgg(timestampCol, 'TIMESTAMP') "
@@ -97,8 +96,7 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
+ "arrayAgg(boolCol, 'BOOLEAN'), "
+ "arrayAgg(intCol, 'INT'), "
+ "arrayAgg(longCol, 'LONG'), "
- // NOTE: FLOAT array is auto converted to DOUBLE array
- + (useMultiStageQueryEngine ? "arrayAgg(floatCol, 'DOUBLE'), " :
"arrayAgg(floatCol, 'FLOAT'), ")
+ + "arrayAgg(floatCol, 'FLOAT'), "
+ "arrayAgg(doubleCol, 'DOUBLE'), "
+ "arrayAgg(stringCol, 'STRING'), "
+ "arrayAgg(timestampCol, 'TIMESTAMP') "
@@ -126,8 +124,7 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
+ "arrayAgg(boolCol, 'BOOLEAN'), "
+ "arrayAgg(intCol, 'INT'), "
+ "arrayAgg(longCol, 'LONG'), "
- // NOTE: FLOAT array is auto converted to DOUBLE array
- + (useMultiStageQueryEngine ? "arrayAgg(floatCol, 'DOUBLE'), " :
"arrayAgg(floatCol, 'FLOAT'), ")
+ + "arrayAgg(floatCol, 'FLOAT'), "
+ "arrayAgg(doubleCol, 'DOUBLE'), "
+ "arrayAgg(stringCol, 'STRING'), "
+ "arrayAgg(timestampCol, 'TIMESTAMP'), "
@@ -374,9 +371,7 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
+ "arrayAgg(boolCol, 'BOOLEAN', true), "
+ "arrayAgg(intCol, 'INT', true), "
+ "arrayAgg(longCol, 'LONG', true), "
- // NOTE: FLOAT array is auto converted to DOUBLE array
- + (useMultiStageQueryEngine ? "arrayAgg(floatCol, 'DOUBLE', true),
"
- : "arrayAgg(floatCol, 'FLOAT', true), ")
+ + "arrayAgg(floatCol, 'FLOAT', true), "
+ "arrayAgg(doubleCol, 'DOUBLE', true), "
+ "arrayAgg(stringCol, 'STRING', true), "
+ "arrayAgg(timestampCol, 'TIMESTAMP', true) "
@@ -404,9 +399,7 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
+ "arrayAgg(boolCol, 'BOOLEAN', true), "
+ "arrayAgg(intCol, 'INT', true), "
+ "arrayAgg(longCol, 'LONG', true), "
- // NOTE: FLOAT array is auto converted to DOUBLE array
- + (useMultiStageQueryEngine ? "arrayAgg(floatCol, 'DOUBLE', true),
"
- : "arrayAgg(floatCol, 'FLOAT', true), ")
+ + "arrayAgg(floatCol, 'FLOAT', true), "
+ "arrayAgg(doubleCol, 'DOUBLE', true), "
+ "arrayAgg(stringCol, 'STRING', true), "
+ "arrayAgg(timestampCol, 'TIMESTAMP', true), "
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/FloatingPointDataTypeTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/FloatingPointDataTypeTest.java
index 7842eca856..6c76127021 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/FloatingPointDataTypeTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/FloatingPointDataTypeTest.java
@@ -153,27 +153,27 @@ public class FloatingPointDataTypeTest extends
CustomDataQueryClusterIntegration
{MET_DOUBLE_SORTED + " > 0.05", "4"},
{MET_DOUBLE_SORTED + " = 0.05", "1"},
{MET_DOUBLE_SORTED + " < 0.05", "5"},
- {MET_FLOAT_SORTED + " > 0.05", "4"},
- {MET_FLOAT_SORTED + " = 0.05", "1"},
- {MET_FLOAT_SORTED + " < 0.05", "5"},
+ {MET_FLOAT_SORTED + " > CAST(0.05 AS FLOAT)", "4"},
+ {MET_FLOAT_SORTED + " = CAST(0.05 AS FLOAT)", "1"},
+ {MET_FLOAT_SORTED + " < CAST(0.05 AS FLOAT)", "5"},
{MET_DOUBLE_UNSORTED + " > 0.05", "4"},
{MET_DOUBLE_UNSORTED + " = 0.05", "1"},
{MET_DOUBLE_UNSORTED + " < 0.05", "5"},
- {MET_FLOAT_UNSORTED + " > 0.05", "4"},
- {MET_FLOAT_UNSORTED + " = 0.05", "1"},
- {MET_FLOAT_UNSORTED + " < 0.05", "5"},
+ {MET_FLOAT_UNSORTED + " > CAST(0.05 AS FLOAT)", "4"},
+ {MET_FLOAT_UNSORTED + " = CAST(0.05 AS FLOAT)", "1"},
+ {MET_FLOAT_UNSORTED + " < CAST(0.05 AS FLOAT)", "5"},
{MET_DOUBLE_SORTED_NO_DIC + " > 0.05", "4"},
{MET_DOUBLE_SORTED_NO_DIC + " = 0.05", "1"},
{MET_DOUBLE_SORTED_NO_DIC + " < 0.05", "5"},
- {MET_FLOAT_SORTED_NO_DIC + " > 0.05", "4"},
- {MET_FLOAT_SORTED_NO_DIC + " = 0.05", "1"},
- {MET_FLOAT_SORTED_NO_DIC + " < 0.05", "5"},
+ {MET_FLOAT_SORTED_NO_DIC + " > CAST(0.05 AS FLOAT)", "4"},
+ {MET_FLOAT_SORTED_NO_DIC + " = CAST(0.05 AS FLOAT)", "1"},
+ {MET_FLOAT_SORTED_NO_DIC + " < CAST(0.05 AS FLOAT)", "5"},
{MET_DOUBLE_UNSORTED_NO_DIC + " > 0.05", "4"},
{MET_DOUBLE_UNSORTED_NO_DIC + " = 0.05", "1"},
{MET_DOUBLE_UNSORTED_NO_DIC + " < 0.05", "5"},
- {MET_FLOAT_UNSORTED_NO_DIC + " > 0.05", "4"},
- {MET_FLOAT_UNSORTED_NO_DIC + " = 0.05", "1"},
- {MET_FLOAT_UNSORTED_NO_DIC + " < 0.05", "5"},
+ {MET_FLOAT_UNSORTED_NO_DIC + " > CAST(0.05 AS FLOAT)", "4"},
+ {MET_FLOAT_UNSORTED_NO_DIC + " = CAST(0.05 AS FLOAT)", "1"},
+ {MET_FLOAT_UNSORTED_NO_DIC + " < CAST(0.05 AS FLOAT)", "5"},
};
for (String[] faec : filterAndExpectedCount) {
String filter = faec[0];
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
index 8e3ec003e8..e852a2cb24 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
@@ -72,24 +72,8 @@ public class TypeFactory extends JavaTypeFactoryImpl {
return SqlTypeName.INTEGER;
case LONG:
return SqlTypeName.BIGINT;
- // Map float and double to the same RelDataType so that queries like
- // `select count(*) from table where aFloatColumn = 0.05` works
correctly in multi-stage query engine.
- //
- // If float and double are mapped to different RelDataType,
- // `select count(*) from table where aFloatColumn = 0.05` will be
converted to
- // `select count(*) from table where CAST(aFloatColumn as "DOUBLE") =
0.05`. While casting
- // from float to double does not always produce the same double value as
the original float value, this leads to
- // wrong query result.
- //
- // With float and double mapped to the same RelDataType, the behavior in
multi-stage query engine will be the same
- // as the query in v1 query engine.
case FLOAT:
- if (fieldSpec.isSingleValueField()) {
- return SqlTypeName.DOUBLE;
- } else {
- // TODO: This may be wrong. The reason why we want to use DOUBLE in
single value float may also apply here
- return SqlTypeName.REAL;
- }
+ return SqlTypeName.REAL;
case DOUBLE:
return SqlTypeName.DOUBLE;
case BOOLEAN:
@@ -97,13 +81,12 @@ public class TypeFactory extends JavaTypeFactoryImpl {
case TIMESTAMP:
return SqlTypeName.TIMESTAMP;
case STRING:
+ case JSON:
return SqlTypeName.VARCHAR;
case BYTES:
return SqlTypeName.VARBINARY;
case BIG_DECIMAL:
return SqlTypeName.DECIMAL;
- case JSON:
- return SqlTypeName.VARCHAR;
case LIST:
// TODO: support LIST, MV column should go fall into this category.
case STRUCT:
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
index fdc3dae3e0..24f94e15c4 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
@@ -47,7 +47,6 @@ public class TypeFactoryTest {
for (FieldSpec.DataType dataType : FieldSpec.DataType.values()) {
RelDataType basicType;
- RelDataType arrayType = null;
switch (dataType) {
case INT: {
basicType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER);
@@ -57,20 +56,8 @@ public class TypeFactoryTest {
basicType = TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT);
break;
}
- // Map float and double to the same RelDataType so that queries like
- // `select count(*) from table where aFloatColumn = 0.05` works
correctly in multi-stage query engine.
- //
- // If float and double are mapped to different RelDataType,
- // `select count(*) from table where aFloatColumn = 0.05` will be
converted to
- // `select count(*) from table where CAST(aFloatColumn as "DOUBLE") =
0.05`. While casting
- // from float to double does not always produce the same double value
as the original float value, this leads to
- // wrong query result.
- //
- // With float and double mapped to the same RelDataType, the behavior
in multi-stage query engine will be the
- // same as the query in v1 query engine.
case FLOAT: {
- basicType = TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE);
- arrayType = TYPE_FACTORY.createSqlType(SqlTypeName.REAL);
+ basicType = TYPE_FACTORY.createSqlType(SqlTypeName.REAL);
break;
}
case DOUBLE: {
@@ -107,18 +94,14 @@ public class TypeFactoryTest {
String message = String.format("Unsupported type: %s ", dataType);
throw new UnsupportedOperationException(message);
}
- if (arrayType == null) {
- arrayType = basicType;
- }
- cases.add(new Object[]{dataType, basicType, arrayType, true});
- cases.add(new Object[]{dataType, basicType, arrayType, false});
+ cases.add(new Object[]{dataType, basicType, true});
+ cases.add(new Object[]{dataType, basicType, false});
}
return cases.iterator();
}
@Test(dataProvider = "relDataTypeConversion")
- public void testScalarTypes(FieldSpec.DataType dataType, RelDataType
scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testScalarTypes(FieldSpec.DataType dataType, RelDataType
scalarType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addSingleValueDimension("col", dataType)
@@ -132,8 +115,7 @@ public class TypeFactoryTest {
}
@Test(dataProvider = "relDataTypeConversion")
- public void testNullableScalarTypes(FieldSpec.DataType dataType, RelDataType
scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testNullableScalarTypes(FieldSpec.DataType dataType, RelDataType
scalarType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addDimensionField("col", dataType, field -> field.setNullable(true))
@@ -151,8 +133,7 @@ public class TypeFactoryTest {
@Test(dataProvider = "relDataTypeConversion")
- public void testNotNullableScalarTypes(FieldSpec.DataType dataType,
RelDataType scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testNotNullableScalarTypes(FieldSpec.DataType dataType,
RelDataType scalarType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addDimensionField("col", dataType, field -> field.setNullable(false))
@@ -170,8 +151,7 @@ public class TypeFactoryTest {
}
@Test(dataProvider = "relDataTypeConversion")
- public void testArrayTypes(FieldSpec.DataType dataType, RelDataType
scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testArrayTypes(FieldSpec.DataType dataType, RelDataType
arrayType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addMultiValueDimension("col", dataType)
@@ -189,8 +169,7 @@ public class TypeFactoryTest {
}
@Test(dataProvider = "relDataTypeConversion")
- public void testNullableArrayTypes(FieldSpec.DataType dataType, RelDataType
scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testNullableArrayTypes(FieldSpec.DataType dataType, RelDataType
arrayType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addDimensionField("col", dataType, field -> {
@@ -211,8 +190,7 @@ public class TypeFactoryTest {
}
@Test(dataProvider = "relDataTypeConversion")
- public void testNotNullableArrayTypes(FieldSpec.DataType dataType,
RelDataType scalarType, RelDataType arrayType,
- boolean columnNullMode) {
+ public void testNotNullableArrayTypes(FieldSpec.DataType dataType,
RelDataType arrayType, boolean columnNullMode) {
TypeFactory typeFactory = new TypeFactory();
Schema testSchema = new Schema.SchemaBuilder()
.addDimensionField("col", dataType, field -> {
@@ -262,6 +240,10 @@ public class TypeFactoryTest {
checkPrecisionScale(field, bigIntBasicSqlType);
break;
case "FLOAT_COL":
+ BasicSqlType realBasicSqlType = new
BasicSqlType(TypeSystem.INSTANCE, SqlTypeName.REAL);
+ Assert.assertEquals(field.getType(), realBasicSqlType);
+ checkPrecisionScale(field, realBasicSqlType);
+ break;
case "DOUBLE_COL":
BasicSqlType doubleBasicSqlType = new
BasicSqlType(TypeSystem.INSTANCE, SqlTypeName.DOUBLE);
Assert.assertEquals(field.getType(), doubleBasicSqlType);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]