Thanks Zheng, and thanks for your great support to this list. I took your idea and wrote the following code that worked for me...I'm no Java whiz...so it's probably fairly inefficient. I do get to talk to the Amazon folks from time to time, so I'll definitely mention my interest in upgrading the Hive version. Thanks again.
Matt package com.company.hadoop.hive.udaf; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; import java.util.Arrays; public class UDAFGroupConcat extends UDAF{ public static class GroupConcatStringEvaluator implements UDAFEvaluator { private Text mOutput; private boolean mEmpty; public GroupConcatStringEvaluator() { super(); init(); } public void init() { mOutput = null; mEmpty = true; } public boolean iterate(Text o, IntWritable N) { if (o!=null) { if(mEmpty) { mOutput = new Text(N+" "+o.toString()); mEmpty = false; } else { String temp = mOutput.toString() + "\t" + N + " " + o.toString(); String[] split = temp.split("\t"); Arrays.sort(split); String sorted = split[0]; for (int i = 1; i < split.length; i++) { sorted = sorted + "\t" + split[i]; } mOutput.set(sorted); } } return true; } public Text terminatePartial() {return mEmpty ? null : mOutput;} public boolean merge(Text o) { if (o!=null) { if(mEmpty) { mOutput = new Text(o.toString()); mEmpty = false; } else { String temp = mOutput.toString() + "\t" + o.toString(); String[] split = temp.split("\t"); Arrays.sort(split); String sorted = split[0]; for (int i = 1; i < split.length; i++) { sorted = sorted + "\t" + split[i]; } mOutput.set(sorted); } } return true; } public Text terminate() {return mEmpty ? null : mOutput;} } } On Fri, Apr 2, 2010 at 4:11 PM, Matthew Bryan <gou...@gmail.com> wrote: > I'm writing a basic group_concat UDAF for the Amazon version of > Hive....and it's working fine for unordered groupings. But I can't > seem to get an ordered version working (filling an array based on an > IntWritable passed alongside). When I move from using Text return type > on terminatePartial() to either Text[] or a State class I start > getting errors: > > FAILED: Error in semantic analysis: > org.apache.hadoop.hive.ql.metadata.HiveException: Cannot recognize > return type class [Lorg.apache.hadoop.io.Text; from public > org.apache.hadoop.io.Text[] > com.company.hadoop.hive.udaf.UDAFGroupConcatN$GroupConcatNStringEvaluator.terminatePartial() > > or > > FAILED: Error in semantic analysis: > org.apache.hadoop.hive.ql.metadata.HiveException: Cannot recognize > return type class > com.company.hadoop.hive.udaf.UDAFGroupConcatN$UDAFGroupConc > atNState from public > com.company.hadoop.hive.udaf.UDAFGroupConcatN$UDAFGroupConcatNState > com.company.hadoop.hive.udaf.UDAFGroupConcatN$GroupConcatNStringEvaluator.terminatePartial > () > > What limits are there on the return type of > terminatePartial()....shouldn't it just have to match the argument of > merge and nothing more? Keep in mind this is the Amazon version of > Hive (0.4 I think).... > > I put both versions of the UDAF below, ordered and unordered. > > Thanks for your time. > > Matt > > > ######### Working Unordered ############ > /*QUERY: select user, event, group_concat(details) from datatable > group by user,event;*/ > > package com.company.hadoop.hive.udaf; > > import org.apache.hadoop.hive.ql.exec.UDAF; > import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; > import org.apache.hadoop.io.Text; > > public class UDAFGroupConcat extends UDAF{ > > public static class GroupConcatStringEvaluator implements > UDAFEvaluator { > private Text mOutput; > private boolean mEmpty; > > public GroupConcatStringEvaluator() { > super(); > init(); > } > > public void init() { > mOutput = null; > mEmpty = true; > } > > public boolean iterate(Text o) { > if (o!=null) { > if(mEmpty) { > mOutput = new Text(o); > mEmpty = false; > } else { > mOutput.set(mOutput.toString()+" > "+o.toString()); > } > } > return true; > } > public Text terminatePartial() {return mEmpty ? null : mOutput;} > public boolean merge(Text o) {return iterate(o);} > public Text terminate() {return mEmpty ? null : mOutput;} > } > } > > ############ Not Working Ordered ############# > /*QUERY: select user, event, group_concatN(details, detail_id) from > datatable group by user,event;*/ > > package com.company.hadoop.hive.udaf; > > import org.apache.hadoop.hive.ql.exec.UDAF; > import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; > import org.apache.hadoop.io.Text; > import org.apache.hadoop.io.IntWritable; > > public class UDAFGroupConcatN extends UDAF{ > > public static class GroupConcatNStringEvaluator implements > UDAFEvaluator { > > private Text[] mArray; > private boolean mEmpty; > > public GroupConcatNStringEvaluator() { > super(); > init(); > } > > public void init() { > mArray = new Text[5]; > mEmpty = true; > } > > public boolean iterate(Text o, IntWritable N) { > if (o!=null&&N!=null) { > mArray[N.get()].set(o.toString()); > mEmpty=false; > } > return true; > } > public Text[] terminatePartial() {return mEmpty ? null : mArray;} > public boolean merge(Text[] o) { > if (o!=null) { > for(int i=0; i<=5; i++){ > if(mArray[i].getLength()==0){ > mArray[i].set(o[i].toString()); > } > } > } > return true; > } > > public Text[] terminate() {return mEmpty ? null : mArray;} > } > } >