Github user myui commented on a diff in the pull request:
https://github.com/apache/incubator-hivemall/pull/108#discussion_r138026120
--- Diff: core/src/main/java/hivemall/tools/list/UDAFToOrderedList.java ---
@@ -0,0 +1,535 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.list;
+
+import hivemall.utils.collections.BoundedPriorityQueue;
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.CommandLineUtils;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.IntWritable;
+
+import javax.annotation.Nonnegative;
+import javax.annotation.Nonnull;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.*;
+
+/**
+ * Return list of values sorted by value itself or specific key.
+ */
+@Description(
+ name = "to_ordered_list",
+ value = "_FUNC_(value [, key, const string options]) - Return list
of values sorted by value itself or specific key")
+public class UDAFToOrderedList extends AbstractGenericUDAFResolver {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
+ throws SemanticException {
+ @SuppressWarnings("deprecation")
+ TypeInfo[] typeInfo = info.getParameters();
+ ObjectInspector[] argOIs = info.getParameterObjectInspectors();
+ if ((typeInfo.length == 1) || (typeInfo.length == 2 &&
HiveUtils.isConstString(argOIs[1]))) {
+ // sort values by value itself w/o key
+ if (typeInfo[0].getCategory() !=
ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0,
+ "Only primitive type arguments are accepted for value
but "
+ + typeInfo[0].getTypeName() + " was passed as
the first parameter.");
+ }
+ } else if ((typeInfo.length == 2)
+ || (typeInfo.length == 3 &&
HiveUtils.isConstString(argOIs[2]))) {
+ // sort values by key
+ if (typeInfo[1].getCategory() !=
ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(1,
+ "Only primitive type arguments are accepted for key
but "
+ + typeInfo[1].getTypeName() + " was passed as
the second parameter.");
+ }
+ } else {
+ throw new UDFArgumentTypeException(typeInfo.length - 1,
+ "Number of arguments must be in [1, 3] including constant
string for options: "
+ + typeInfo.length);
+ }
+ return new UDAFToOrderedListEvaluator();
+ }
+
+ public static class UDAFToOrderedListEvaluator extends
GenericUDAFEvaluator {
+
+ private ObjectInspector valueOI;
+ private PrimitiveObjectInspector keyOI;
+
+ private ListObjectInspector valueListOI;
+ private ListObjectInspector keyListOI;
+
+ private StructObjectInspector internalMergeOI;
+
+ private StructField valueListField;
+ private StructField keyListField;
+ private StructField sizeField;
+ private StructField reverseOrderField;
+
+ @Nonnegative
+ private int size;
+ private boolean reverseOrder;
+ private boolean sortByKey;
+
+ protected Options getOptions() {
+ Options opts = new Options();
+ opts.addOption("k", true, "To top-k (positive) or tail-k
(negative) ordered queue");
+ opts.addOption("reverse", "reverse_order", false,
+ "Sort values by key in a reverse (e.g., descending) order
[default: false]");
+ return opts;
+ }
+
+ @Nonnull
+ protected final CommandLine parseOptions(String optionValue)
throws UDFArgumentException {
+ String[] args = optionValue.split("\\s+");
+ Options opts = getOptions();
+ opts.addOption("help", false, "Show function help");
+ CommandLine cl = CommandLineUtils.parseOptions(args, opts);
+
+ if (cl.hasOption("help")) {
+ Description funcDesc =
getClass().getAnnotation(Description.class);
+ final String cmdLineSyntax;
+ if (funcDesc == null) {
+ cmdLineSyntax = getClass().getSimpleName();
+ } else {
+ String funcName = funcDesc.name();
+ cmdLineSyntax = funcName == null ?
getClass().getSimpleName()
+ : funcDesc.value().replace("_FUNC_",
funcDesc.name());
+ }
+ StringWriter sw = new StringWriter();
+ sw.write('\n');
+ PrintWriter pw = new PrintWriter(sw);
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp(pw, HelpFormatter.DEFAULT_WIDTH,
cmdLineSyntax, null, opts,
+ HelpFormatter.DEFAULT_LEFT_PAD,
HelpFormatter.DEFAULT_DESC_PAD, null, true);
+ pw.flush();
+ String helpMsg = sw.toString();
+ throw new UDFArgumentException(helpMsg);
+ }
+
+ return cl;
+ }
+
+ protected CommandLine processOptions(ObjectInspector[] argOIs)
throws UDFArgumentException {
+ CommandLine cl = null;
+
+ int optionIndex = 1;
+ if (sortByKey) {
+ optionIndex = 2;
+ }
+
+ int k = 0;
+ boolean reverseOrder = false;
+
+ if (argOIs.length >= optionIndex + 1) {
+ String rawArgs =
HiveUtils.getConstString(argOIs[optionIndex]);
+ cl = parseOptions(rawArgs);
+
+ reverseOrder = cl.hasOption("reverse_order");
+
+ if (cl.hasOption("k")) {
+ k = Integer.parseInt(cl.getOptionValue("k"));
+ if (k == 0) {
+ throw new UDFArgumentException("`k` must be
nonzero: " + k);
+ }
+ }
+ }
+
+ this.size = Math.abs(k);
+
+ if ((k > 0 && reverseOrder) || (k < 0 && !reverseOrder) || (k
== 0 && !reverseOrder)) {
+ // reverse top-k, natural tail-k = ascending = natural
order output = reverse order priority queue
+ this.reverseOrder = true;
+ } else { // (k > 0 && !reverseOrder) || (k < 0 &&
reverseOrder) || (k == 0 && reverseOrder)
+ // natural top-k or reverse tail-k = descending = reverse
order output = natural order priority queue
+ this.reverseOrder = false;
--- End diff --
Why `k == 0 && reverseOrder` => `reverseOrder = false` ??
---