This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a22250270d [multistage] test coverage for CASE and fix bug with CHAR
literals (#9846)
a22250270d is described below
commit a22250270da250b03943624eae2a9466164b864c
Author: Almog Gavra <[email protected]>
AuthorDate: Tue Nov 29 16:10:23 2022 -0800
[multistage] test coverage for CASE and fix bug with CHAR literals (#9846)
* [multistage] test coverage for CASE and fix bug with CHAR literals
* address feedback
---
.../org/apache/pinot/query/type/TypeFactory.java | 3 +-
.../org/apache/pinot/query/type/TypeSystem.java | 15 ++++-
.../runtime/queries/ResourceBasedQueriesTest.java | 3 +-
.../src/test/resources/queries/Case.json | 77 ++++++++++++++++++++++
4 files changed, 93 insertions(+), 5 deletions(-)
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
index 9354ccfb95..0e1814e8c9 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
@@ -38,10 +38,9 @@ import org.apache.pinot.spi.data.Schema;
* upgrading Calcite versions.
*/
public class TypeFactory extends JavaTypeFactoryImpl {
- private final RelDataTypeSystem _typeSystem;
public TypeFactory(RelDataTypeSystem typeSystem) {
- _typeSystem = typeSystem;
+ super(typeSystem);
}
public RelDataType createRelDataTypeFromSchema(Schema schema) {
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
index b2606d9296..ea6a9e7a35 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
@@ -23,8 +23,19 @@ import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
/**
* The {@code TypeSystem} overwrites Calcite type system with Pinot specific
logics.
- *
- * TODO: no overwrite for now.
*/
public class TypeSystem extends RelDataTypeSystemImpl {
+
+ @Override
+ public boolean shouldConvertRaggedUnionTypesToVarying() {
+ // A "ragged" union refers to a union of two or more data types that don't
all
+ // have the same precision or scale. In these cases, Calcite may need to
promote
+ // one or more of the data types in order to maintain consistency.
+ //
+ // Pinot doesn't properly handle CHAR(FIXED) - by default, Calcite will
cast a
+ // CHAR(2) to a CHAR(3), but this will cause 2-char strings to be expanded
with
+ // spaces at the end (e.g. 'No' -> 'No '), which ultimately causes
incorrect
+ // behavior. This calcite flag will cause this to be cast to VARCHAR
instead
+ return true;
+ }
}
diff --git
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
index c4541b1528..59831d04c4 100644
---
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
+++
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
@@ -73,7 +73,8 @@ public class ResourceBasedQueriesTest extends
QueryRunnerTestBase {
"TimeTypes.json",
"BooleanLogic.json",
"Comparisons.json",
- "Aggregates.json"
+ "Aggregates.json",
+ "Case.json"
);
@BeforeClass
diff --git a/pinot-query-runtime/src/test/resources/queries/Case.json
b/pinot-query-runtime/src/test/resources/queries/Case.json
new file mode 100644
index 0000000000..449f602f71
--- /dev/null
+++ b/pinot-query-runtime/src/test/resources/queries/Case.json
@@ -0,0 +1,77 @@
+{
+ "case_when_test": {
+ "tables": {
+ "tbl1": {
+ "schema":[
+ {"name": "intCol", "type": "INT"},
+ {"name": "boolCol", "type": "BOOLEAN"},
+ {"name": "floatCol", "type": "FLOAT"},
+ {"name": "doubleCol", "type": "DOUBLE"},
+ {"name": "strCol", "type": "STRING"}
+ ],
+ "inputs": [
+ [1, true, 2.0, 3.0, "one"],
+ [2, false, 4.0, 6.0, "two"],
+ [3, true, 6.0, 9.0, "three"],
+ [4, false, 8.0, 12.0, "four"]
+ ]
+ }
+ },
+ "queries": [
+ { "sql": "SELECT intCol, CASE WHEN boolCol = true THEN 'Yes' ELSE 'No'
END AS boolVal, strCol FROM {tbl1}"},
+ {
+ "ignored": true,
+ "comment": "should return NULL, but returns '' (empty string) when
case is not matched",
+ "sql": "SELECT intCol, CASE WHEN boolCol = true THEN 'Yes' END AS
boolVal, strCol FROM {tbl1}"
+ },
+ { "sql": "SELECT intCol FROM {tbl1} WHERE CASE WHEN strCol = 'one' THEN
true ELSE boolCol END"},
+ { "sql": "SELECT intCol, CASE WHEN strCol = 'one' THEN 'The first one'
WHEN strCol = 'two' THEN 'The second one' WHEN strCol = 'three' THEN 'The third
one' WHEN strCol = 'four' THEN 'The fourth one' ELSE 'Unknown' END AS stringVal
FROM {tbl1}"},
+ { "sql": "SELECT intCol, CASE WHEN intCol % 2 = 0 THEN intCol ELSE
intCol * 2 END AS intVal, strCol FROM {tbl1}"},
+ { "sql": "SELECT intCol, CASE WHEN floatCol > 4.0 THEN floatCol ELSE
floatCol / 2.0 END AS floatVal, strCol FROM {tbl1}"},
+ { "sql": "SELECT intCol, CASE WHEN doubleCol > 6.0 THEN doubleCol ELSE
doubleCol / 2.0 END AS doubleVal, strCol FROM {tbl1}"},
+ { "sql": "SELECT intCol, CASE WHEN (SELECT SUM(floatCol) FROM {tbl1}) >
16.0 THEN 'Large sum' ELSE 'Small sum' END AS aggVal, strCol FROM {tbl1}"}
+ ]
+ },
+ "nested_case_when_test": {
+ "tables": {
+ "tbl1": {
+ "schema":[
+ {"name": "primary_key", "type": "INT"},
+ {"name": "description", "type": "STRING"}
+ ],
+ "inputs": [
+ [1, "Item one"],
+ [2, "Item two"],
+ [3, "Item three"],
+ [4, "Item four"]
+ ]
+ },
+ "tbl2": {
+ "schema":[
+ {"name": "primary_key", "type": "INT"},
+ {"name": "attribute", "type": "STRING"}
+ ],
+ "inputs": [
+ [1, "apple"],
+ [2, "chocolate"],
+ [3, "red"],
+ [4, "thirty"]
+ ]
+ }
+ },
+ "queries": [
+ {
+ "sql": "SELECT {tbl1}.primary_key, {tbl1}.description, CASE WHEN
{tbl2}.attribute = 'red' THEN 'Color' ELSE 'Non-color' END AS attribute FROM
{tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
+ "description": "Joins the two tables and categorizes attributes from
tbl2 as either 'Color' or 'Non-color'"
+ },
+ {
+ "sql": "SELECT {tbl1}.primary_key, CASE WHEN {tbl1}.description =
'Item one' THEN {tbl2}.attribute ELSE {tbl1}.description END AS description
FROM {tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
+ "description": "Joins the two tables and selects either the attribute
from tbl2 or the description from tbl1, depending on the description from tbl1"
+ },
+ {
+ "sql": "SELECT {tbl1}.primary_key, SUM(CASE WHEN {tbl2}.attribute =
'chocolate' THEN 1 ELSE 0 END) as chocolate_count FROM {tbl1} JOIN {tbl2} ON
{tbl1}.primary_key = {tbl2}.primary_key GROUP BY {tbl1}.primary_key",
+ "description": "Joins the two tables and aggregates the number of
times 'chocolate' appears as an attribute in tbl2"
+ }
+ ]
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]