[pinot] branch master updated: [multistage] test coverage for CASE and fix bug with CHAR literals (#9846)

rongr Tue, 29 Nov 2022 16:10:32 -0800

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git



The following commit(s) were added to refs/heads/master by this push:
     new a22250270d [multistage] test coverage for CASE and fix bug with CHAR 
literals (#9846)
a22250270d is described below

commit a22250270da250b03943624eae2a9466164b864c
Author: Almog Gavra <[email protected]>
AuthorDate: Tue Nov 29 16:10:23 2022 -0800

    [multistage] test coverage for CASE and fix bug with CHAR literals (#9846)
    
    * [multistage] test coverage for CASE and fix bug with CHAR literals
    
    * address feedback
---
 .../org/apache/pinot/query/type/TypeFactory.java   |  3 +-
 .../org/apache/pinot/query/type/TypeSystem.java    | 15 ++++-
 .../runtime/queries/ResourceBasedQueriesTest.java  |  3 +-
 .../src/test/resources/queries/Case.json           | 77 ++++++++++++++++++++++
 4 files changed, 93 insertions(+), 5 deletions(-)

diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
index 9354ccfb95..0e1814e8c9 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
@@ -38,10 +38,9 @@ import org.apache.pinot.spi.data.Schema;
  * upgrading Calcite versions.
  */
 public class TypeFactory extends JavaTypeFactoryImpl {
-  private final RelDataTypeSystem _typeSystem;
 
   public TypeFactory(RelDataTypeSystem typeSystem) {
-    _typeSystem = typeSystem;
+    super(typeSystem);
   }
 
   public RelDataType createRelDataTypeFromSchema(Schema schema) {
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
index b2606d9296..ea6a9e7a35 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeSystem.java
@@ -23,8 +23,19 @@ import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
 
 /**
  * The {@code TypeSystem} overwrites Calcite type system with Pinot specific 
logics.
- *
- * TODO: no overwrite for now.
  */
 public class TypeSystem extends RelDataTypeSystemImpl {
+
+  @Override
+  public boolean shouldConvertRaggedUnionTypesToVarying() {
+    // A "ragged" union refers to a union of two or more data types that don't 
all
+    // have the same precision or scale. In these cases, Calcite may need to 
promote
+    // one or more of the data types in order to maintain consistency.
+    //
+    // Pinot doesn't properly handle CHAR(FIXED) - by default, Calcite will 
cast a
+    // CHAR(2) to a CHAR(3), but this will cause 2-char strings to be expanded 
with
+    // spaces at the end (e.g. 'No' -> 'No '), which ultimately causes 
incorrect
+    // behavior. This calcite flag will cause this to be cast to VARCHAR 
instead
+    return true;
+  }
 }
diff --git 
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
 
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
index c4541b1528..59831d04c4 100644
--- 
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
+++ 
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
@@ -73,7 +73,8 @@ public class ResourceBasedQueriesTest extends 
QueryRunnerTestBase {
       "TimeTypes.json",
       "BooleanLogic.json",
       "Comparisons.json",
-      "Aggregates.json"
+      "Aggregates.json",
+      "Case.json"
   );
 
   @BeforeClass
diff --git a/pinot-query-runtime/src/test/resources/queries/Case.json 
b/pinot-query-runtime/src/test/resources/queries/Case.json
new file mode 100644
index 0000000000..449f602f71
--- /dev/null
+++ b/pinot-query-runtime/src/test/resources/queries/Case.json
@@ -0,0 +1,77 @@
+{
+  "case_when_test": {
+    "tables": {
+      "tbl1": {
+        "schema":[
+          {"name": "intCol", "type": "INT"},
+          {"name": "boolCol", "type": "BOOLEAN"},
+          {"name": "floatCol", "type": "FLOAT"},
+          {"name": "doubleCol", "type": "DOUBLE"},
+          {"name": "strCol", "type": "STRING"}
+        ],
+        "inputs": [
+          [1, true, 2.0, 3.0, "one"],
+          [2, false, 4.0, 6.0, "two"],
+          [3, true, 6.0, 9.0, "three"],
+          [4, false, 8.0, 12.0, "four"]
+        ]
+      }
+    },
+    "queries": [
+      { "sql": "SELECT intCol, CASE WHEN boolCol = true THEN 'Yes' ELSE 'No' 
END AS boolVal, strCol FROM {tbl1}"},
+      {
+        "ignored": true,
+        "comment": "should return NULL, but returns '' (empty string) when 
case is not matched",
+        "sql": "SELECT intCol, CASE WHEN boolCol = true THEN 'Yes' END AS 
boolVal, strCol FROM {tbl1}"
+      },
+      { "sql": "SELECT intCol FROM {tbl1} WHERE CASE WHEN strCol = 'one' THEN 
true ELSE boolCol END"},
+      { "sql": "SELECT intCol, CASE WHEN strCol = 'one' THEN 'The first one' 
WHEN strCol = 'two' THEN 'The second one' WHEN strCol = 'three' THEN 'The third 
one' WHEN strCol = 'four' THEN 'The fourth one' ELSE 'Unknown' END AS stringVal 
FROM {tbl1}"},
+      { "sql": "SELECT intCol, CASE WHEN intCol % 2 = 0 THEN intCol ELSE 
intCol * 2 END AS intVal, strCol FROM {tbl1}"},
+      { "sql": "SELECT intCol, CASE WHEN floatCol > 4.0 THEN floatCol ELSE 
floatCol / 2.0 END AS floatVal, strCol FROM {tbl1}"},
+      { "sql": "SELECT intCol, CASE WHEN doubleCol > 6.0 THEN doubleCol ELSE 
doubleCol / 2.0 END AS doubleVal, strCol FROM {tbl1}"},
+      { "sql": "SELECT intCol, CASE WHEN (SELECT SUM(floatCol) FROM {tbl1}) > 
16.0 THEN 'Large sum' ELSE 'Small sum' END AS aggVal, strCol FROM {tbl1}"}
+    ]
+  },
+  "nested_case_when_test": {
+    "tables": {
+      "tbl1": {
+        "schema":[
+          {"name": "primary_key", "type": "INT"},
+          {"name": "description", "type": "STRING"}
+        ],
+        "inputs": [
+          [1, "Item one"],
+          [2, "Item two"],
+          [3, "Item three"],
+          [4, "Item four"]
+        ]
+      },
+      "tbl2": {
+        "schema":[
+          {"name": "primary_key", "type": "INT"},
+          {"name": "attribute", "type": "STRING"}
+        ],
+        "inputs": [
+          [1, "apple"],
+          [2, "chocolate"],
+          [3, "red"],
+          [4, "thirty"]
+        ]
+      }
+    },
+    "queries": [
+      {
+        "sql": "SELECT {tbl1}.primary_key, {tbl1}.description, CASE WHEN 
{tbl2}.attribute = 'red' THEN 'Color' ELSE 'Non-color' END AS attribute FROM 
{tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
+        "description": "Joins the two tables and categorizes attributes from 
tbl2 as either 'Color' or 'Non-color'"
+      },
+      {
+        "sql": "SELECT {tbl1}.primary_key, CASE WHEN {tbl1}.description = 
'Item one' THEN {tbl2}.attribute ELSE {tbl1}.description END AS description 
FROM {tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
+        "description": "Joins the two tables and selects either the attribute 
from tbl2 or the description from tbl1, depending on the description from tbl1"
+      },
+      {
+        "sql": "SELECT {tbl1}.primary_key, SUM(CASE WHEN {tbl2}.attribute = 
'chocolate' THEN 1 ELSE 0 END) as chocolate_count FROM {tbl1} JOIN {tbl2} ON 
{tbl1}.primary_key = {tbl2}.primary_key GROUP BY {tbl1}.primary_key",
+        "description": "Joins the two tables and aggregates the number of 
times 'chocolate' appears as an attribute in tbl2"
+      }
+    ]
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[pinot] branch master updated: [multistage] test coverage for CASE and fix bug with CHAR literals (#9846)

Reply via email to