DRILL-743: New String Functions

Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/53a89d69
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/53a89d69
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/53a89d69

Branch: refs/heads/master
Commit: 53a89d69dad990be4b64998bfc89d46fadfc8a0a
Parents: fed331b
Author: Yash Sharma <yash...@gmail.com>
Authored: Sat Jul 5 17:50:50 2014 +0530
Committer: Jacques Nadeau <jacq...@apache.org>
Committed: Mon Jul 7 15:52:35 2014 -0700

----------------------------------------------------------------------
 .../exec/expr/fn/impl/StringFunctionUtil.java   |  15 +++
 .../exec/expr/fn/impl/StringFunctions.java      | 134 +++++++++++++++++++
 .../exec/physical/impl/TestStringFunctions.java |   8 +-
 .../functions/string/testStringFuncs.json       |  44 ++++++
 4 files changed, 198 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/53a89d69/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionUtil.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionUtil.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionUtil.java
index 16ff8f2..6825309 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionUtil.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionUtil.java
@@ -87,4 +87,19 @@ public class StringFunctionUtil {
         + " at position " + idx + " encountered while decoding UTF8 string.");
   }
 
+  public static int utf8CharLen(byte currentByte) {
+    if (currentByte >= 0){                 // 1-byte char. First byte is 
0xxxxxxx.
+        return 1;
+    }
+    else if ((currentByte & 0xE0) == 0xC0 ){   // 2-byte char. First byte is 
110xxxxx
+        return 2;
+    }
+    else if ((currentByte & 0xF0) == 0xE0 ){   // 3-byte char. First byte is 
1110xxxx
+        return 3;
+    }
+    else if ((currentByte & 0xF8) == 0xF0){    //4-byte char. First byte is 
11110xxx
+        return 4;
+    }
+    throw new DrillRuntimeException("Unexpected byte 0x" + 
Integer.toString((int)currentByte & 0xff, 16) + " encountered while decoding 
UTF8 string.");
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/53a89d69/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 92ac56f..fcb3b77 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -33,6 +33,8 @@ import org.apache.drill.exec.expr.holders.VarBinaryHolder;
 import org.apache.drill.exec.expr.holders.VarCharHolder;
 import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
 import org.apache.drill.exec.record.RecordBatch;
+import java.nio.charset.Charset;
+import org.apache.drill.exec.expr.holders.IntHolder;
 
 public class StringFunctions{
   static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StringFunctions.class);
@@ -942,4 +944,136 @@ public class StringFunctions{
     }
   }
 
+
+  /**
+  * Returns the ASCII code of the first character of input string
+  */
+  @FunctionTemplate(name = "ascii", scope = FunctionScope.SIMPLE, nulls = 
NullHandling.NULL_IF_NULL)
+  public static class AsciiString implements DrillSimpleFunc {
+
+    @Param  VarCharHolder in;
+    @Output IntHolder out;
+
+    public void setup(RecordBatch incoming) { }
+
+    public void eval() {
+      out.value = in.buffer.getByte(in.start);
+    }
+  }
+
+  /**
+  * Returns the char corresponding to ASCII code input.
+  */
+  @FunctionTemplate(name = "chr", scope = FunctionScope.SIMPLE, nulls = 
NullHandling.NULL_IF_NULL)
+  public static class AsciiToChar implements DrillSimpleFunc {
+
+    @Param  IntHolder in;
+    @Output VarCharHolder out;
+    @Workspace ByteBuf buffer;
+
+    public void setup(RecordBatch incoming) {
+      buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [1]);
+    }
+
+    public void eval() {
+      out.buffer = buffer;
+      out.start = out.end = 0;
+      out.buffer.setByte(0, in.value);
+      ++out.end;
+    }
+  }
+
+  /**
+  * Returns the input char sequences repeated nTimes.
+  */
+  @FunctionTemplate(names = {"repeat", "repeatstr"}, scope = 
FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+  public static class RepeatString implements DrillSimpleFunc {
+
+    @Param  VarCharHolder in;
+    @Param IntHolder nTimes;
+    @Output VarCharHolder out;
+    @Workspace ByteBuf buffer;
+
+    public void setup(RecordBatch incoming) {
+    }
+
+    public void eval() {
+      int num = nTimes.value;
+      byte[] bytea = new byte [(in.end - in.start)*num];
+      int index = 0;
+      while(num > 0){
+        for (int id = in.start; id < in.end; id++){
+        bytea[index++] = in.buffer.getByte(id);
+        }
+        num--;
+      }
+      out.buffer = io.netty.buffer.Unpooled.wrappedBuffer(bytea);
+      out.start = 0;
+      out.end = bytea.length;
+    }
+  }
+
+  /**
+  * Convert string to ASCII from another encoding input.
+  */
+  @FunctionTemplate(name = "toascii", scope = FunctionScope.SIMPLE, nulls = 
NullHandling.NULL_IF_NULL)
+  public static class AsciiEndode implements DrillSimpleFunc {
+
+    @Param  VarCharHolder in;
+    @Param  VarCharHolder enc;
+    @Output VarCharHolder out;
+    @Workspace Charset inCharset;
+
+    public void setup(RecordBatch incoming) {
+      inCharset = java.nio.charset.Charset.forName(enc.toString());
+    }
+
+    public void eval() {
+      byte[] bytea = new byte[in.end - in.start];
+      int index =0;
+      for(int i = in.start; i<in.end; i++, index++){
+      bytea[index]=in.buffer.getByte(i);
+      }
+      byte[] outBytea = new String(bytea, 
inCharset).getBytes(com.google.common.base.Charsets.UTF_8);
+      out.buffer = io.netty.buffer.Unpooled.wrappedBuffer(outBytea);
+      out.start = 0;
+      out.end = outBytea.length;
+    }
+  }
+
+  /**
+  * Returns the reverse string for given input.
+  */
+  @FunctionTemplate(name = "reverse", scope = FunctionScope.SIMPLE, nulls = 
NullHandling.NULL_IF_NULL)
+  public static class ReverseString implements DrillSimpleFunc {
+
+    @Param  VarCharHolder in;
+    @Output VarCharHolder out;
+    @Workspace ByteBuf buffer;
+
+    public void setup(RecordBatch incoming) {
+    }
+
+    public void eval() {
+        int charlen = 0;
+
+        byte[] bytea = new byte [in.end - in.start];
+        int index = in.end;
+        int innerindex = 0;
+
+        for (int id = in.start; id < in.end; id+=charlen){
+        innerindex = charlen = 
org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.utf8CharLen(in.buffer, 
id);
+
+        while(innerindex > 0){
+          bytea[index - innerindex] = in.buffer.getByte(id + (charlen - 
innerindex));
+          innerindex-- ;
+        }
+
+        index -= charlen;
+        }
+        out.buffer = io.netty.buffer.Unpooled.wrappedBuffer(bytea);
+        out.start = 0;
+        out.end = bytea.length;
+      }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/53a89d69/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
index 3f9ba5e..cb9e749 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
@@ -23,7 +23,6 @@ import mockit.Injectable;
 import mockit.NonStrictExpectations;
 
 import org.apache.drill.common.config.DrillConfig;
-import org.apache.drill.common.util.TestTools;
 import org.apache.drill.exec.ExecTest;
 import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
 import org.apache.drill.exec.memory.TopLevelAllocator;
@@ -37,9 +36,7 @@ import org.apache.drill.exec.rpc.user.UserServer;
 import org.apache.drill.exec.server.DrillbitContext;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.VarCharVector;
-import org.junit.Rule;
 import org.junit.Test;
-import org.junit.rules.TestRule;
 
 import com.codahale.metrics.MetricRegistry;
 import com.google.common.base.Charsets;
@@ -243,4 +240,9 @@ public class TestStringFunctions extends ExecTest {
     runTest(bitContext, connection, expected, 
"functions/string/testUpper.json");
   }
 
+  @Test
+  public void testNewStringFuncs(@Injectable final DrillbitContext bitContext, 
@Injectable UserServer.UserClientConnection connection) throws Throwable{
+    Object [] expected = new Object[] {97, 65, -32, "A", "btrim", "Peace Peace 
Peace ", "हकुना मताता हकुना मताता ", 
"katcit", "\u00C3\u00A2pple", "नदम"};
+    runTest(bitContext, connection, expected, 
"functions/string/testStringFuncs.json");
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/53a89d69/exec/java-exec/src/test/resources/functions/string/testStringFuncs.json
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/resources/functions/string/testStringFuncs.json 
b/exec/java-exec/src/test/resources/functions/string/testStringFuncs.json
new file mode 100644
index 0000000..4beea3d
--- /dev/null
+++ b/exec/java-exec/src/test/resources/functions/string/testStringFuncs.json
@@ -0,0 +1,44 @@
+{
+    head:{
+        type:"APACHE_DRILL_PHYSICAL",
+        version:"1",
+        generator:{
+            type:"manual"
+        }
+    },
+    graph:[
+        {
+            @id:1,
+            pop:"mock-sub-scan",
+            url: "http://apache.org";,
+            entries:[
+               {records: 1, types: [
+                 {name: "varcharcol", type: "VARCHAR", mode: "REQUIRED"},
+                 {name: "nullvarcharcol", type: "VARCHAR", mode: "OPTIONAL"}
+               ]}
+            ]
+        },
+        {
+            @id:2,
+            child: 1,
+            pop:"project",
+            exprs: [
+              { ref : "ref1", expr : " ascii('apache') "},
+              { ref : "ref2", expr : " ascii('Apache') "},
+              { ref : "ref3", expr : " ascii('अपाचे') "},
+              { ref : "ref4", expr : " chr(65) "},
+              { ref : "ref5", expr : " btrim('xyxbtrimyyx', 'xy') "},
+              { ref : "ref6", expr : " repeatstr('Peace ', 3) "},
+              { ref : "ref7", expr : " repeatstr('हकुना 
मताता ', 2) "},
+              { ref : "ref8", expr : " reverse('tictak') "},
+              { ref : "ref9", expr : " toascii('âpple','ISO-8859-1') "},
+              { ref : "ref10", expr : " reverse('मदन') "}
+           ]
+        },
+        {
+            @id: 3,
+            child: 2,
+            pop: "screen"
+        }
+    ]
+}

Reply via email to