[
https://issues.apache.org/jira/browse/PHOENIX-4237?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16209674#comment-16209674
]
ASF GitHub Bot commented on PHOENIX-4237:
-----------------------------------------
Github user JamesRTaylor commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/275#discussion_r145475530
--- Diff:
phoenix-core/src/test/java/org/apache/phoenix/expression/function/CollationKeyFunctionTest.java
---
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.function.CollationKeyFunction;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.schema.types.PVarchar;
+
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.LiteralExpression;
+
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * "Unit" tests for CollationKeyFunction
+ *
+ * @author snakhoda-sfdc
+ *
+ */
+public class CollationKeyFunctionTest {
+
+ @Test
+ public void testChineseCollationKeyBytes() throws Exception {
+
+ // Chinese (China)
+ test("\u963f", "zh", "02eb000000010000");
+ test("\u55c4", "zh", "14ad000000010000");
+ test("\u963e", "zh", "8000963f0000000100010000");
+ test("\u554a", "zh", "02ea000000010000");
+ test("\u4ec8", "zh", "80004ec90000000100010000");
+ test("\u3d9a", "zh", "80003d9b0000000100010000");
+ test("\u9f51", "zh", "1905000000010000");
+
+ // Chinese (Taiwan)
+ test("\u963f", "zh_TW", "063d000000010000");
+ test("\u55c4", "zh_TW", "241e000000010000");
+ test("\u963e", "zh_TW", "8000963f0000000100010000");
+ test("\u554a", "zh_TW", "09c9000000010000");
+ test("\u4ec8", "zh_TW", "181b000000010000");
+ test("\u3d9a", "zh_TW", "80003d9b0000000100010000");
+ test("\u9f51", "zh_TW", "80009f520000000100010000");
+
+ // Chinese (Taiwan, Stroke)
+ test("\u963f", "zh_TW_STROKE", "5450010500");
+ test("\u55c4", "zh_TW_STROKE", "7334010500");
+ test("\u963e", "zh_TW_STROKE", "544f010500");
+ test("\u554a", "zh_TW_STROKE", "62de010500");
+ test("\u4ec8", "zh_TW_STROKE", "46be010500");
+ test("\u3d9a", "zh_TW_STROKE", "a50392010500");
+ test("\u9f51", "zh_TW_STROKE", "8915010500");
+
+ // Chinese (China, Stroke)
+ test("\u963f", "zh__STROKE", "28010500");
+ test("\u55c4", "zh__STROKE", "2a010500");
+ test("\u963e", "zh__STROKE", "7575010500");
+ test("\u554a", "zh__STROKE", "2b010500");
+ test("\u4ec8", "zh__STROKE", "51a1010500");
+ test("\u3d9a", "zh__STROKE", "a50392010500");
+ test("\u9f51", "zh__STROKE", "6935010500");
+
+ // Chinese (China, Pinyin)
+ test("\u963f", "zh__PINYIN", "28010500");
+ test("\u55c4", "zh__PINYIN", "2a010500");
+ test("\u963e", "zh__PINYIN", "7575010500");
+ test("\u554a", "zh__PINYIN", "2b010500");
+ test("\u4ec8", "zh__PINYIN", "51a1010500");
+ test("\u3d9a", "zh__PINYIN", "a50392010500");
+ test("\u9f51", "zh__PINYIN", "6935010500");
+
+ }
+
+ private static void test(String inputStr, String localeIsoCode, String
expectedCollationKeyBytesHex)
+ throws Exception {
+ boolean ret1 = testExpression(inputStr, localeIsoCode,
SortOrder.ASC, expectedCollationKeyBytesHex);
+ boolean ret2 = testExpression(inputStr, localeIsoCode,
SortOrder.DESC, expectedCollationKeyBytesHex);
+ assertEquals(ret1, ret2);
+ }
+
+ private static boolean testExpression(String inputStr, String
localeIsoCode, SortOrder sortOrder,
+ String expectedCollationKeyBytesHex) throws Exception {
+ LiteralExpression inputStrLiteral, localeIsoCodeLiteral,
upperCaseBooleanLiteral, strengthLiteral,
+ decompositionLiteral;
+ inputStrLiteral = LiteralExpression.newConstant(inputStr,
PVarchar.INSTANCE, sortOrder);
+ localeIsoCodeLiteral =
LiteralExpression.newConstant(localeIsoCode, PVarchar.INSTANCE, sortOrder);
+ upperCaseBooleanLiteral =
LiteralExpression.newConstant(Boolean.FALSE, PBoolean.INSTANCE, sortOrder);
+ strengthLiteral = LiteralExpression.newConstant(null,
PInteger.INSTANCE, sortOrder);
+ decompositionLiteral = LiteralExpression.newConstant(null,
PInteger.INSTANCE, sortOrder);
--- End diff --
Please add tests around setting strength and decomposition.
> Allow sorting on (Java) collation keys for non-English locales
> --------------------------------------------------------------
>
> Key: PHOENIX-4237
> URL: https://issues.apache.org/jira/browse/PHOENIX-4237
> Project: Phoenix
> Issue Type: Improvement
> Reporter: Shehzaad Nakhoda
> Fix For: 4.12.0
>
>
> Strings stored via Phoenix can be composed from a subset of the entire set of
> Unicode characters. The natural sort order for strings for different
> languages often differs from the order dictated by the binary representation
> of the characters of these strings. Java provides the idea of a Collator
> which given an input string and a (language) locale can generate a Collation
> Key which can then be used to compare strings in that natural order.
> Salesforce has recently open-sourced grammaticus. IBM has open-sourced ICU4J
> some time ago. These technologies can be combined to provide a robust new
> Phoenix function that can be used in an ORDER BY clause to sort strings
> according to the user's locale.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)