[
https://issues.apache.org/jira/browse/DRILL-5193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Muhammad Gelbana updated DRILL-5193:
------------------------------------
Description:
I defined the following UDF
{code:title=SplitPartFunc.java|borderStyle=solid}
import javax.inject.Inject;
import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.IntHolder;
import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;
import io.netty.buffer.DrillBuf;
@FunctionTemplate(name = "split_string", scope =
FunctionTemplate.FunctionScope.SIMPLE, nulls =
FunctionTemplate.NullHandling.NULL_IF_NULL)
public class SplitPartFunc implements DrillSimpleFunc {
@Param
VarCharHolder input;
@Param(constant = true)
VarCharHolder delimiter;
@Param(constant = true)
IntHolder field;
@Output
NullableVarCharHolder out;
@Inject
DrillBuf buffer;
public void setup() {
}
public void eval() {
String stringValue =
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
input.end, input.buffer);
out.buffer = buffer; //If I return before this statement, a NPE is
thrown :(
if(stringValue == null){
return;
}
int fieldValue = field.value;
if(fieldValue <= 0){
return;
}
String delimiterValue =
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(delimiter.start,
delimiter.end, delimiter.buffer);
if(delimiterValue == null){
return;
}
String[] splittedInput = stringValue.split(delimiterValue);
if(splittedInput.length < fieldValue){
return;
}
// put the output value in the out buffer
String outputValue = splittedInput[fieldValue - 1];
out.start = 0;
out.end = outputValue.getBytes().length;
buffer.setBytes(0, outputValue.getBytes());
out.isSet = 1;
}
}
{code}
If I run the following query on the sample employees.json file (or actually a
parquet, after modifying the table and columns names)
{code:title=SQL Query|borderStyle=solid}SELECT full_name,
split_string(full_name, ' ', 4), split_string('Whatever', ' ', 4) FROM
cp.employee.json LIMIT 1{code}
I get the following result
!https://i.stack.imgur.com/L8uQW.png!
Shouldn't I be getting NULLs for the last 2 columns ?
was:
I defined the following UDF
{code:title=SplitPartFunc.java|borderStyle=solid}
import javax.inject.Inject;
import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.IntHolder;
import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;
import io.netty.buffer.DrillBuf;
@FunctionTemplate(name = "split_string", scope =
FunctionTemplate.FunctionScope.SIMPLE, nulls =
FunctionTemplate.NullHandling.NULL_IF_NULL)
public class SplitPartFunc implements DrillSimpleFunc {
@Param
VarCharHolder input;
@Param(constant = true)
VarCharHolder delimiter;
@Param(constant = true)
IntHolder field;
@Output
NullableVarCharHolder out;
@Inject
DrillBuf buffer;
public void setup() {
}
public void eval() {
String stringValue =
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
input.end, input.buffer);
out.buffer = buffer; //If I return before this statement, a NPE is
thrown :(
if(stringValue == null){
return;
}
int fieldValue = field.value;
if(fieldValue <= 0){
return;
}
String delimiterValue =
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(delimiter.start,
delimiter.end, delimiter.buffer);
if(delimiterValue == null){
return;
}
String[] splittedInput = stringValue.split(delimiterValue);
if(splittedInput.length < fieldValue){
return;
}
// put the output value in the out buffer
String outputValue = splittedInput[fieldValue - 1];
out.start = 0;
out.end = outputValue.getBytes().length;
buffer.setBytes(0, outputValue.getBytes());
out.isSet = 1;
}
}
{code}
If I run the following query on the sample employees.json file (or actually a
parquet, after modifying the table and columns names)
{code:title=SQL Query|borderStyle=solid}SELECT full_name,
split_string(full_name, ' ', 4), split_string('Whatever', ' ', 4) FROM
cp.employee.json LIMIT 1{code}
I get the following result
!https://i.stack.imgur.com/L8uQW.png!
Shouldn't I be getting the column value and null for the other 2 columns ?
> UDF returns NULL as expected only if the input is a literal
> -----------------------------------------------------------
>
> Key: DRILL-5193
> URL: https://issues.apache.org/jira/browse/DRILL-5193
> Project: Apache Drill
> Issue Type: Bug
> Components: Functions - Drill
> Affects Versions: 1.9.0
> Reporter: Muhammad Gelbana
>
> I defined the following UDF
> {code:title=SplitPartFunc.java|borderStyle=solid}
> import javax.inject.Inject;
> import org.apache.drill.exec.expr.DrillSimpleFunc;
> import org.apache.drill.exec.expr.annotations.FunctionTemplate;
> import org.apache.drill.exec.expr.annotations.Output;
> import org.apache.drill.exec.expr.annotations.Param;
> import org.apache.drill.exec.expr.holders.IntHolder;
> import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
> import org.apache.drill.exec.expr.holders.VarCharHolder;
> import io.netty.buffer.DrillBuf;
> @FunctionTemplate(name = "split_string", scope =
> FunctionTemplate.FunctionScope.SIMPLE, nulls =
> FunctionTemplate.NullHandling.NULL_IF_NULL)
> public class SplitPartFunc implements DrillSimpleFunc {
> @Param
> VarCharHolder input;
> @Param(constant = true)
> VarCharHolder delimiter;
> @Param(constant = true)
> IntHolder field;
> @Output
> NullableVarCharHolder out;
> @Inject
> DrillBuf buffer;
> public void setup() {
> }
> public void eval() {
> String stringValue =
> org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
> input.end, input.buffer);
> out.buffer = buffer; //If I return before this statement, a NPE is
> thrown :(
> if(stringValue == null){
> return;
> }
> int fieldValue = field.value;
> if(fieldValue <= 0){
> return;
> }
> String delimiterValue =
> org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(delimiter.start,
> delimiter.end, delimiter.buffer);
> if(delimiterValue == null){
> return;
> }
> String[] splittedInput = stringValue.split(delimiterValue);
> if(splittedInput.length < fieldValue){
> return;
> }
> // put the output value in the out buffer
> String outputValue = splittedInput[fieldValue - 1];
> out.start = 0;
> out.end = outputValue.getBytes().length;
> buffer.setBytes(0, outputValue.getBytes());
> out.isSet = 1;
> }
> }
> {code}
> If I run the following query on the sample employees.json file (or actually a
> parquet, after modifying the table and columns names)
> {code:title=SQL Query|borderStyle=solid}SELECT full_name,
> split_string(full_name, ' ', 4), split_string('Whatever', ' ', 4) FROM
> cp.employee.json LIMIT 1{code}
> I get the following result
> !https://i.stack.imgur.com/L8uQW.png!
> Shouldn't I be getting NULLs for the last 2 columns ?
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)