Jianfeng Jia has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1054
Change subject: Fix ASTERIXDB-1566 ...................................................................... Fix ASTERIXDB-1566 Change the utf8 comparator from RawComparator to UTF8Comparator Change-Id: I187bf1243abf143b3b265fa8098614b9a72c65ad --- A asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.2.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.3.query.aql A asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml M asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java M hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java 8 files changed, 82 insertions(+), 16 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/54/1054/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.ddl.aql new file mode 100644 index 0000000..d17ea60 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.ddl.aql @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +drop dataverse test if exists; +create dataverse test; + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.2.update.aql new file mode 100644 index 0000000..042f3ce --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.2.update.aql @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.3.query.aql new file mode 100644 index 0000000..4959846 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.3.query.aql @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse test; + +"的"="离" +"و"="ن" diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm new file mode 100644 index 0000000..4b095fd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm @@ -0,0 +1,2 @@ +false +false diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml index 82f5071..f410cbe 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -5411,6 +5411,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="string-equal-public"> + <output-dir compare="Text">string-equal-public</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="string-join1"> <output-dir compare="Text">string-join1</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java index feb3228..60b3113 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java @@ -42,15 +42,7 @@ import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.accessors.PointableBinaryComparatorFactory; -import org.apache.hyracks.data.std.primitive.ByteArrayPointable; -import org.apache.hyracks.data.std.primitive.BytePointable; -import org.apache.hyracks.data.std.primitive.DoublePointable; -import org.apache.hyracks.data.std.primitive.FloatPointable; -import org.apache.hyracks.data.std.primitive.IntegerPointable; -import org.apache.hyracks.data.std.primitive.LongPointable; -import org.apache.hyracks.data.std.primitive.RawUTF8StringPointable; -import org.apache.hyracks.data.std.primitive.ShortPointable; -import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable; +import org.apache.hyracks.data.std.primitive.*; public class AqlBinaryComparatorFactoryProvider implements IBinaryComparatorFactoryProvider, Serializable { @@ -69,7 +61,7 @@ public static final PointableBinaryComparatorFactory DOUBLE_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory( DoublePointable.FACTORY); public static final PointableBinaryComparatorFactory UTF8STRING_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory( - RawUTF8StringPointable.FACTORY); + UTF8StringPointable.FACTORY); // Equivalent to UTF8STRING_POINTABLE_INSTANCE but all characters are considered lower case to implement case-insensitive comparisons. public static final PointableBinaryComparatorFactory UTF8STRING_LOWERCASE_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory( UTF8StringLowercasePointable.FACTORY); diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java index 8cfe51e..126be5f 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java @@ -25,11 +25,7 @@ import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction; import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory; import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory; -import org.apache.hyracks.data.std.primitive.DoublePointable; -import org.apache.hyracks.data.std.primitive.FloatPointable; -import org.apache.hyracks.data.std.primitive.IntegerPointable; -import org.apache.hyracks.data.std.primitive.RawUTF8StringPointable; -import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable; +import org.apache.hyracks.data.std.primitive.*; public class AqlBinaryHashFunctionFactoryProvider implements IBinaryHashFunctionFactoryProvider, Serializable { @@ -42,7 +38,7 @@ public static final PointableBinaryHashFunctionFactory DOUBLE_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory( DoublePointable.FACTORY); public static final PointableBinaryHashFunctionFactory UTF8STRING_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory( - RawUTF8StringPointable.FACTORY); + UTF8StringPointable.FACTORY); // Equivalent to UTF8STRING_POINTABLE_INSTANCE but all characters are considered lower case to implement case-insensitive hashing. public static final PointableBinaryHashFunctionFactory UTF8STRING_LOWERCASE_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory( UTF8StringLowercasePointable.FACTORY); diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java b/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java index f101ab1..f200384 100644 --- a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java +++ b/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java @@ -38,11 +38,13 @@ import static org.apache.hyracks.util.string.UTF8StringUtil.normalize; import static org.apache.hyracks.util.string.UTF8StringUtil.rawByteCompareTo; import static org.apache.hyracks.util.string.UTF8StringUtil.hash; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; +import org.junit.Assert; import org.junit.Test; public class UTF8StringUtilTest { @@ -66,6 +68,14 @@ } @Test + public void testChinese() { + byte[] bufferDe = writeStringToBytes("的"); + byte[] bufferLi = writeStringToBytes("离"); + int ret = compareTo(bufferDe, 0, bufferLi, 0); + assertTrue(ret != 0); + } + + @Test public void testCompareToAndNormolize() throws Exception { testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX, OPTION.STANDARD); testCompare(STRING_UTF8_3, STRING_UTF8_MIX, OPTION.STANDARD); -- To view, visit https://asterix-gerrit.ics.uci.edu/1054 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I187bf1243abf143b3b265fa8098614b9a72c65ad Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Jianfeng Jia <[email protected]>
