Repository: drill Updated Branches: refs/heads/master 6ddd5fa73 -> 3186217e5
DRILL-4607: Add a split function that allows to separate string by a delimiter This closes #506 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/3186217e Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/3186217e Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/3186217e Branch: refs/heads/master Commit: 3186217e5abe3c6c2c7e504cdb695567ff577e4c Parents: 6ddd5fa Author: Alicia Alvarez <[email protected]> Authored: Fri Apr 15 11:07:47 2016 -0700 Committer: Aditya Kishore <[email protected]> Committed: Thu Jun 2 11:01:25 2016 -0700 ---------------------------------------------------------------------- .../exec/expr/fn/impl/StringFunctions.java | 38 ++++++++++++++++++++ .../exec/expr/fn/impl/TestStringFunctions.java | 17 ++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/3186217e/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java index 0ce1c4e..41ff55f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java @@ -1331,6 +1331,44 @@ public class StringFunctions{ } // end of eval } + @FunctionTemplate(name = "split", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL) + public static class Split implements DrillSimpleFunc { + @Param VarCharHolder input; + @Param VarCharHolder delimiter; + + @Workspace com.google.common.base.Splitter splitter; + @Inject DrillBuf buffer; + + @Output org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter writer; + + @Override + public void setup() { + int len = delimiter.end - delimiter.start; + if (len != 1) { + throw new IllegalArgumentException("Only single character delimiters are supportted for split()"); + } + char splitChar = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers. + toStringFromUTF8(delimiter.start, delimiter.end, delimiter.buffer).charAt(0); + splitter = com.google.common.base.Splitter.on(splitChar); + } + + @Override + public void eval() { + Iterable<String> tokens = splitter.split( + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer)); + org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter list = writer.rootAsList(); + list.startList(); + for (String token : tokens) { + final byte[] strBytes = token.getBytes(com.google.common.base.Charsets.UTF_8); + buffer = buffer.reallocIfNeeded(strBytes.length); + buffer.setBytes(0, strBytes); + list.varChar().writeVarChar(0, strBytes.length, buffer); + } + list.endList(); + } + + } + @FunctionTemplate(name = "concatOperator", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL) public static class ConcatOperator implements DrillSimpleFunc { @Param VarCharHolder left; http://git-wip-us.apache.org/repos/asf/drill/blob/3186217e/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java index 2efab3b..612408b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java @@ -17,10 +17,13 @@ */ package org.apache.drill.exec.expr.fn.impl; +import static org.junit.Assert.assertTrue; + import org.apache.drill.BaseTestQuery; +import org.apache.drill.exec.util.Text; import org.junit.Test; -import static org.junit.Assert.assertTrue; +import com.google.common.collect.ImmutableList; public class TestStringFunctions extends BaseTestQuery { @@ -232,4 +235,16 @@ public class TestStringFunctions extends BaseTestQuery { .build() .run(); } + + @Test + public void testSplit() throws Exception { + testBuilder() + .sqlQuery("select split(n_name, ' ') words from cp.`tpch/nation.parquet` where n_nationkey = 24") + .unOrdered() + .baselineColumns("words") + .baselineValues(ImmutableList.of(new Text("UNITED"), new Text("STATES"))) + .build() + .run(); + } + }
