[
https://issues.apache.org/jira/browse/DRILL-6963?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16985067#comment-16985067
]
benj commented on DRILL-6963:
-----------------------------
For the second point (arry_agg), in attempt of an eventual official function,
here is a simple implementation that can do that (without possibility to
_DISTINCT_ or _ORDER BY_)
{code:java}
package org.apache.drill.contrib.function;
import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.expr.DrillAggFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.expr.holders.*;
import javax.inject.Inject;
// If dataset is too large, need : ALTER SESSION SET `planner.enable_hashagg` =
false
public class ArrayAgg {
// STRING NULLABLE //
@FunctionTemplate(
name = "array_agg",
scope = FunctionScope.POINT_AGGREGATE,
nulls = NullHandling.INTERNAL)
public static class NullableVarChar_ArrayAgg implements DrillAggFunc {
@Param NullableVarCharHolder input;
@Workspace ObjectHolder agg;
@Output org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter
out;
@Inject DrillBuf buffer;
@Override public void setup() {
agg = new ObjectHolder();
}
@Override public void reset() {
agg = new ObjectHolder();
}
@Override public void add() {
org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter
listWriter;
if (agg.obj == null) {
agg.obj = out.rootAsList();
}
if ( input.isSet == 0 )
return;
org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new
org.apache.drill.exec.expr.holders.VarCharHolder();
byte[] inputBytes =
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
input ).getBytes( com.google.common.base.Charsets.UTF_8 );
buffer.reallocIfNeeded(inputBytes.length);
buffer.setBytes(0, inputBytes);
rowHolder.start = 0;
rowHolder.end = inputBytes.length;
rowHolder.buffer = buffer;
listWriter =
(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) agg.obj;
listWriter.varChar().write( rowHolder );
}
@Override public void output() {
((org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter)
agg.obj).endList();
}
}
// INTEGER NULLABLE //
@FunctionTemplate(
name = "array_agg",
scope = FunctionScope.POINT_AGGREGATE,
nulls = NullHandling.INTERNAL)
public static class NullableInt_ArrayAgg implements DrillAggFunc {
@Param NullableIntHolder input;
@Workspace ObjectHolder agg;
@Output
org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter out;
@Inject DrillBuf buffer;
@Override public void setup() {
agg = new ObjectHolder();
}
@Override public void reset() {
agg = new ObjectHolder();
}
@Override public void add() {
org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter
listWriter;
if (agg.obj == null) {
agg.obj = out.rootAsList();
}
if ( input.isSet == 0 )
return;
listWriter =
(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) agg.obj;
listWriter.integer().writeInt( input.value );
}
@Override public void output() {
((org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter)
agg.obj).endList();
}
}
// ...
}
{code}
> create/aggregate/work with array
> --------------------------------
>
> Key: DRILL-6963
> URL: https://issues.apache.org/jira/browse/DRILL-6963
> Project: Apache Drill
> Issue Type: Wish
> Components: Functions - Drill
> Reporter: benj
> Priority: Major
>
> * Add the possibility to build array (like : SELECT array[a1,a2,a3...]) -
> ideally work with all types
> * Add a default array_agg (like : SELECT col1, array_agg(col2),
> array_agg(DISTINCT col2) FROM ... GROUP BY col1) ; - ideally work with all
> types
> * Add function/facilities/operator to work with array
--
This message was sent by Atlassian Jira
(v8.3.4#803005)