[ 
https://issues.apache.org/jira/browse/DRILL-1330?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16765927#comment-16765927
 ] 

benj edited comment on DRILL-1330 at 2/12/19 11:14 AM:
-------------------------------------------------------

Any news/advance/projection on this very old topic but in fact one of the 
really missing functionality of Drill.

Like say in differents topic, all users of Drill are not java native developers 
and if it's possible to create functionality, this one is really missing from 
start with Drill.

In a concern for help new user, these aggregate could be use as an example of 
implementation of an aggregate with nonnumeric type.

Please find in the next section my (probably poor) code to do that, probably 
not the fastest and maybe with some bugs. Maybe a real java coders \{c,s}ould 
write a standard version and add this aggregate in the list of aggregate that 
we can use in Drill queries.

In last point please note that (I think) it's not possible to use the ORDER BY 
clause in an aggregat. But sometimes it will be useful (Postgres allow that for 
example)

 
{code:java}
SELECT string_agg(city, ',' ORDER BY city DESC) FROM ...;
{code}
string_agg :
{code:java}
package org.apache.drill.contrib.function;

import com.google.common.base.Strings;
import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.DrillAggFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.vector.ValueHolderHelper;
import org.apache.drill.exec.expr.holders.*;
//import org.apache.drill.exec.expr.holders.VarCharHolder;
//import org.apache.drill.exec.expr.holders.NullableVarCharHolder;

import javax.inject.Inject;

public class StringAgg {

@FunctionTemplate(
    name = "string_agg",
    scope = FunctionScope.POINT_AGGREGATE,
    nulls = NullHandling.INTERNAL)
public static class VarChar_MyAggStringAgg implements DrillAggFunc {
  @Param VarCharHolder input;
  @Param(constant = true) VarCharHolder input_separator;
  @Workspace ObjectHolder agg; // If dataset is too large, need : ALTER SESSION 
SET `planner.enable_hashagg` = false
  @Output NullableVarCharHolder out;
  @Inject DrillBuf buffer;

  @Override public void setup() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void reset() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void add() {
    org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
    String s = new String(tmp.getBytes())
      + (tmp.getLength() > 0 ? 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input_separator ) : "")
      + 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input );
    byte[] t = s.getBytes();
    tmp.setBytes(t);
  }

  @Override public void output() {
      out.isSet = 1;
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      buffer = buffer.reallocIfNeeded(tmp.getLength());
      buffer.setBytes(0, tmp.getBytes(), 0, tmp.getLength());
      out.start  = 0;
      out.end    = tmp.getLength();
      out.buffer = buffer;
  }
}

//////////

@FunctionTemplate(
    name = "string_agg",
    scope = FunctionScope.POINT_AGGREGATE,
    nulls = NullHandling.INTERNAL)
public static class NullableVarChar_MyAggStringAgg implements DrillAggFunc {
  @Param NullableVarCharHolder input;
  @Param(constant = true) VarCharHolder input_separator;
  @Workspace ObjectHolder agg;
  @Workspace BitHolder nonNull;
  @Output NullableVarCharHolder out;
  @Inject DrillBuf buffer;

  @Override public void setup() {
    nonNull.value = 0;
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void reset() {
    nonNull.value = 0;
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void add() {
    if ( input.isSet != 0 ) {
      nonNull.value = 1;
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      String s = new String(tmp.getBytes())
        + (tmp.getLength() > 0 ? 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input_separator ) : "")
        //+ 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input ); // -- Don't know why can't compile with public static String 
getStringFromVarCharHolder(NullableVarCharHolder varCharHolder)
        + 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
 input.end, input.buffer);
      byte[] t = s.getBytes();
      tmp.setBytes(t);
    }
  }

  @Override public void output() {
    if ( nonNull.value == 1 ) {
      out.isSet = 1;
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      buffer = buffer.reallocIfNeeded(tmp.getLength());
      buffer.setBytes(0, tmp.getBytes(), 0, tmp.getLength());
      out.start  = 0;
      out.end    = tmp.getLength();
      out.buffer = buffer;
    }
    else {
      out.isSet = 0;
    }
  }
}


}{code}
 

 


was (Author: benj641):
Any news/advance/projection on this very old topic but in fact one of the 
really missing functionality of Drill.

Like say in differents topic, all users of Drill are not java native developers 
and if it's possible to create functionality, this one is really missing from 
start with Drill.

In a concern for help new user, these aggregate could be use as an example of 
implementation of an aggregate with nonnumeric type.

Please find in the next section my (probably poor) code to do that, probably 
not the fastest and maybe with some bugs. Maybe a real java coders \{c,s}ould 
write a standard version and add this aggregate in the list of aggregate that 
we can use in Drill queries.

In last point please note that (I think) it's not possible to use the ORDER BY 
clause in an aggregat. But sometimes it will be useful (Postgres allow that for 
example)

 
{code:java}
SELECT string_agg(city, ',' ORDER BY city DESC) FROM ...;
{code}
string_agg :
{code:java}
package org.apache.drill.contrib.function;

import com.google.common.base.Strings;
import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.DrillAggFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.vector.ValueHolderHelper;
import org.apache.drill.exec.expr.holders.*;
//import org.apache.drill.exec.expr.holders.VarCharHolder;
//import org.apache.drill.exec.expr.holders.NullableVarCharHolder;

import javax.inject.Inject;

public class StringAgg {

@FunctionTemplate(
    name = "string_agg",
    scope = FunctionScope.POINT_AGGREGATE,
    nulls = NullHandling.INTERNAL)
public static class VarChar_MyAggStringAgg implements DrillAggFunc {
  @Param VarCharHolder input;
  @Param(constant = true) VarCharHolder input_separator;
  @Workspace ObjectHolder agg; // If dataset is too large, need : ALTER SESSION 
SET `planner.enable_hashagg` = false
  @Output NullableVarCharHolder out;
  @Inject DrillBuf buffer;

  @Override public void setup() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void reset() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void add() {
    org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
    String s = new String(tmp.getBytes())
      + (tmp.getLength() > 0 ? 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input_separator ) : "")
      + 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input );
    byte[] t = s.getBytes();
    tmp.setBytes(t);
  }

  @Override public void output() {
      out.isSet = 1;
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      buffer = buffer.reallocIfNeeded(tmp.getLength());
      buffer.setBytes(0, tmp.getBytes(), 0, tmp.getLength());
      out.start  = 0;
      out.end    = tmp.getLength();
      out.buffer = buffer;
  }
}

//////////

@FunctionTemplate(
    name = "string_agg",
    scope = FunctionScope.POINT_AGGREGATE,
    nulls = NullHandling.INTERNAL)
public static class NullableVarChar_MyAggStringAgg implements DrillAggFunc {
  @Param NullableVarCharHolder input;
  @Param(constant = true) VarCharHolder input_separator;
  @Workspace ObjectHolder agg;
  @Output NullableVarCharHolder out;
  @Inject DrillBuf buffer;

  @Override public void setup() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void reset() {
    agg = new ObjectHolder();
    agg.obj = new org.apache.drill.exec.expr.fn.impl.DrillByteArray();
  }

  @Override public void add() {
    if ( input.isSet != 0 ) {
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      String s = new String(tmp.getBytes())
        + (tmp.getLength() > 0 ? 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input_separator ) : "")
        //+ 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input ); // -- Don't know why can't compile with public static String 
getStringFromVarCharHolder(NullableVarCharHolder varCharHolder)
        + 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
 input.end, input.buffer);
      byte[] t = s.getBytes();
      tmp.setBytes(t);
    }
  }

  @Override public void output() {
      out.isSet = 1;
      org.apache.drill.exec.expr.fn.impl.DrillByteArray tmp = 
(org.apache.drill.exec.expr.fn.impl.DrillByteArray) agg.obj;
      buffer = buffer.reallocIfNeeded(tmp.getLength());
      buffer.setBytes(0, tmp.getBytes(), 0, tmp.getLength());
      out.start  = 0;
      out.end    = tmp.getLength();
      out.buffer = buffer;
  }
}


}
{code}
 

 

> String aggregate function - string_agg(expression, delimiter)
> -------------------------------------------------------------
>
>                 Key: DRILL-1330
>                 URL: https://issues.apache.org/jira/browse/DRILL-1330
>             Project: Apache Drill
>          Issue Type: Improvement
>          Components: Functions - Drill
>            Reporter: Yash Sharma
>            Priority: Minor
>             Fix For: Future
>
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to