Repository: incubator-drill
Updated Branches:
  refs/heads/master 66cf6e274 -> c305c794a


DRILL-1470 : cast into varchar should recognize the length parameter in 
varchar. Fix casting function implementation: the length parameter should mean 
# of chars, not # of bytes.

New unit test case to verify the result from cast function.

Fix bug in cast into varchar. When target length = 0, it means we want to keep 
the input .

code clean up.

Include change for varbinary cast as well.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/73f9827b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/73f9827b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/73f9827b

Branch: refs/heads/master
Commit: 73f9827bff061912d4cc81169488eda4dfa17234
Parents: 66cf6e2
Author: Jinfeng Ni <j...@maprtech.com>
Authored: Tue Nov 4 07:22:43 2014 -0800
Committer: Jinfeng Ni <j...@maprtech.com>
Committed: Tue Nov 11 13:14:59 2014 -0800

----------------------------------------------------------------------
 exec/java-exec/src/main/codegen/data/Casts.tdd  |  2 ++
 .../CastFunctionsSrcVarLenTargetVarLen.java     | 27 ++++++++++++++++--
 .../exec/expr/ExpressionTreeMaterializer.java   | 14 ++++++----
 .../org/apache/drill/TestExampleQueries.java    | 29 ++++++++++++++++++++
 4 files changed, 64 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/codegen/data/Casts.tdd
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/data/Casts.tdd 
b/exec/java-exec/src/main/codegen/data/Casts.tdd
index a4f5fb1..cdd6218 100644
--- a/exec/java-exec/src/main/codegen/data/Casts.tdd
+++ b/exec/java-exec/src/main/codegen/data/Casts.tdd
@@ -50,7 +50,9 @@
     {from: "Float8", to: "VarBinary", major: "TargetVarlen", javaType: 
"Double", bufferLength:"100"},     
     
     {from: "VarBinary", to: "VarChar", major: "SrcVarlenTargetVarlen"},
+    {from: "VarChar", to: "VarChar", major: "SrcVarlenTargetVarlen"},
     {from: "VarChar", to: "VarBinary", major: "SrcVarlenTargetVarlen"},
+    {from: "VarBinary", to: "VarBinary", major: "SrcVarlenTargetVarlen"},
 
     {from: "Date", to: "TimeStamp", major: "Date"},
     {from: "Date", to: "TimeStampTZ", major: "Date"},

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
 
b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
index 0096be8..cd8f7bd 100644
--- 
a/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
+++ 
b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
@@ -47,22 +47,43 @@ public class Cast${type.from}${type.to} implements 
DrillSimpleFunc{
 
   @Param ${type.from}Holder in;
   @Param BigIntHolder length;
-  @Workspace ByteBuf buffer;     
   @Output ${type.to}Holder out;
 
   public void setup(RecordBatch incoming) {
   }
 
   public void eval() {
+
+  <#if type.to == 'VarChar'>
+
+    //Do 1st scan to counter # of character in string.
+    int charCount = 
org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(in.buffer,
 in.start, in.end);
+
+    //if input length <= target_type length, do nothing
+    //else if target length = 0, it means cast wants all the characters in the 
input. Do nothing.
+    //else truncate based on target_type length.
+    out.buffer = in.buffer;
+    out.start =  in.start;
+    if (charCount <= length.value || length.value == 0 ) {
+      out.end = in.end;
+    } else {
+      out.end = 
org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(in.buffer,
 in.start, in.end, (int)length.value);
+    }
+
+  <#elseif type.to == 'VarBinary'>
+
     //if input length <= target_type length, do nothing
-    //else truncate based on target_type length.     
+    //else if target length = 0, it means cast wants all the bytes in the 
input. Do nothing.
+    //else truncate based on target_type length.
     out.buffer = in.buffer;
     out.start =  in.start;
-    if (in.end - in.start <= length.value) {
+    if (in.end - in.start <= length.value || length.value == 0 ) {
       out.end = in.end;
     } else {
       out.end = out.start + (int) length.value;
     }
+  </#if>
+
   }      
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
index 9cec3a4..a2d5e10 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
@@ -523,7 +523,7 @@ public class ExpressionTreeMaterializer {
       MajorType newMajor = e.getMajorType();
       MinorType newMinor = input.getMajorType().getMinorType();
 
-      if (castEqual(e.getPosition(), newMajor, input.getMajorType())) {
+      if (castEqual(e.getPosition(), input.getMajorType(), newMajor)) {
         return input; // don't do pointless cast.
       }
 
@@ -601,11 +601,15 @@ public class ExpressionTreeMaterializer {
       case VAR16CHAR:
       case VARBINARY:
       case VARCHAR:
-        if (to.getWidth() < from.getWidth() && to.getWidth() > 0) {
-          this.errorCollector.addGeneralError(pos, "Casting from a longer 
variable length type to a shorter variable length type is not currently 
supported.");
-          return false;
-        } else {
+        // We could avoid redundant cast:
+        // 1) when "to" length is no smaller than "from" length and "from" 
length is known (>0),
+        // 2) or "to" length is unknown (0 means unknown length?).
+        // Case 1 and case 2 mean that cast will do nothing.
+        // In other cases, cast is required to trim the "from" according to 
"to" length.
+        if ( (to.getWidth() >= from.getWidth() && from.getWidth() > 0) || 
to.getWidth() == 0) {
           return true;
+        } else {
+          return false;
         }
 
       default:

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java 
b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index cd70b3c..e134a73 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -542,4 +542,33 @@ public class TestExampleQueries extends BaseTestQuery{
     assertEquals(String.format("Received unexepcted number of rows in output: 
expected=%d, received=%s",
         expectedRecordCount, actualRecordCount), expectedRecordCount, 
actualRecordCount);
   }
+
+  @Test // DRILL-1470
+  public void testCastToVarcharWithLength() throws Exception {
+    // cast from varchar with unknown length to a fixed length.
+    int actualRecordCount = testSql("select first_name from cp.`employee.json` 
where cast(first_name as varchar(2)) = 'Sh'");
+    int expectedRecordCount = 27;
+    assertEquals(String.format("Received unexepcted number of rows in output: 
expected=%d, received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, 
actualRecordCount);
+
+    // cast from varchar with unknown length to varchar(5), then to 
varchar(10), then to varchar(2). Should produce the same result as the first 
query.
+    actualRecordCount = testSql("select first_name from cp.`employee.json` 
where cast(cast(cast(first_name as varchar(5)) as varchar(10)) as varchar(2)) = 
'Sh'");
+    expectedRecordCount = 27;
+    assertEquals(String.format("Received unexepcted number of rows in output: 
expected=%d, received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, 
actualRecordCount);
+
+
+    // this long nested cast expression should be essentially equal to 
substr(), meaning the query should return every row in the table.
+    actualRecordCount = testSql("select first_name from cp.`employee.json` 
where cast(cast(cast(first_name as varchar(5)) as varchar(10)) as varchar(2)) = 
substr(first_name, 1, 2)");
+    expectedRecordCount = 1155;
+    assertEquals(String.format("Received unexepcted number of rows in output: 
expected=%d, received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, 
actualRecordCount);
+
+    // cast is applied to a column from parquet file.
+    actualRecordCount = testSql("select n_name from cp.`tpch/nation.parquet` 
where cast(n_name as varchar(2)) = 'UN'");
+    expectedRecordCount = 2;
+    assertEquals(String.format("Received unexepcted number of rows in output: 
expected=%d, received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, 
actualRecordCount);
+  }
+
 }

Reply via email to