Github user JamesRTaylor commented on a diff in the pull request: https://github.com/apache/phoenix/pull/275#discussion_r145233589 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java --- @@ -0,0 +1,221 @@ +package org.apache.phoenix.expression.function; + +import java.sql.SQLException; +import java.text.Collator; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +import org.apache.commons.lang.BooleanUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.phoenix.expression.Expression; +import org.apache.phoenix.parse.FunctionParseNode; +import org.apache.phoenix.schema.tuple.Tuple; +import org.apache.phoenix.schema.types.PBoolean; +import org.apache.phoenix.schema.types.PDataType; +import org.apache.phoenix.schema.types.PInteger; +import org.apache.phoenix.schema.types.PIntegerArray; +import org.apache.phoenix.schema.types.PUnsignedIntArray; +import org.apache.phoenix.schema.types.PVarbinary; +import org.apache.phoenix.schema.types.PVarchar; +import org.apache.phoenix.schema.types.PhoenixArray; +import org.apache.phoenix.util.VarBinaryFormatter; + +import com.force.db.i18n.LinguisticSort; +import com.force.i18n.LocaleUtils; + +import com.ibm.icu.impl.jdkadapter.CollatorICU; +import com.ibm.icu.util.ULocale; + +/** + * A Phoenix Function that calculates a collation key for an input string based + * on a caller-provided locale and collator strength and decomposition settings. + * + * It uses the open-source grammaticus and i18n packages to obtain the collators + * it needs. + * + * @author snakhoda + * + */ +@FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = { + // input string + @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }), + // ISO Code for Locale + @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true), + // whether to use special upper case collator + @FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true), + // collator strength + @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true), + // collator decomposition + @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) }) +public class CollationKeyFunction extends ScalarFunction { + + private static final Log LOG = LogFactory.getLog(CollationKeyFunction.class); + + public static final String NAME = "COLLKEY"; --- End diff -- Is there a convention in other RDBMS for the name of this function? Is it spelled out COLLATION_KEY or abbreviated as you've done? If abbreviated, then IMHO, it'd be better to name the class and unit tests CollKeyFunction, CollKeyFunctionIT, etc. to make it easier to find (i.e. based on the function name). That's our typical convention.
---