This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 20314d118059da634847e1ddb1b1d744e552e713 Merge: 2521ade cc6143b Author: Ali Alsuliman <ali.al.solai...@gmail.com> AuthorDate: Wed Sep 8 00:19:26 2021 +0300 Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat' Change-Id: I3e700b07781bec8fc5b9eabf15a1249ce2be0272 .../substr-ASTERIXDB-2949.0.query.sqlpp | 25 ++++++++++++++++ .../substr-ASTERIXDB-2949.0.adm | 1 + .../test/resources/runtimets/testsuite_sqlpp.xml | 5 ++++ .../data/std/primitive/UTF8StringPointable.java | 3 +- .../data/std/util/AbstractVarLenObjectBuilder.java | 5 ++-- .../std/primitive/UTF8StringPointableTest.java | 33 ++++++++++++++++++++++ 6 files changed, 69 insertions(+), 3 deletions(-) diff --cc asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index f0b20bf,600dde8..1e142a2 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@@ -9926,8 -9301,13 +9926,13 @@@ </compilation-unit> </test-case> <test-case FilePath="string"> - <compilation-unit name="substring-after-1"> - <output-dir compare="Text">substring-after-1</output-dir> ++ <compilation-unit name="substr-ASTERIXDB-2949"> ++ <output-dir compare="Text">substr-ASTERIXDB-2949</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> - <compilation-unit name="substring-after-2"> - <output-dir compare="Text">substring-after-2</output-dir> + <compilation-unit name="regexp_position_with_flag/offset0/regex_position0_with_flag"> + <output-dir compare="Text">regexp_position_with_flag/offset0/regex_position0_with_flag</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java index 828de18,eff71de..49f6221 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java @@@ -463,12 -368,14 +463,13 @@@ public final class UTF8StringPointable return false; } - builder.reset(out, Math.min(utfLen - byteIdx, (int) (codePointLength * 1.0 * byteIdx / codePointIdx))); - // for byteIdx = 0, this estimate assumes that every char size = 1 byte - int estimateOutBytes = byteIdx == 0 ? charLength : (int) (charLength * 1.0 * byteIdx / chIdx); ++ int estimateOutBytes = byteIdx == 0 ? codePointLength : (int) (codePointLength * 1.0 * byteIdx / codePointIdx); + builder.reset(out, Math.min(utfLen - byteIdx, estimateOutBytes)); - chIdx = 0; - while (byteIdx < utfLen && chIdx < charLength) { - builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx)); - chIdx++; - byteIdx += src.charSize(src.getMetaDataLength() + byteIdx); + codePointIdx = 0; + while (byteIdx < utfLen && codePointIdx < codePointLength) { + builder.appendCodePoint(src.codePointAt(src.getMetaDataLength() + byteIdx)); + codePointIdx++; + byteIdx += src.codePointSize(src.getMetaDataLength() + byteIdx); } builder.finish(); return true; diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java index dcf9a10,ed439cb..f088c7e --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java @@@ -306,60 -255,39 +306,93 @@@ public class UTF8StringPointableTest result.set(storage.getByteArray(), 0, storage.getLength()); expected = generateUTF8Pointable("is is it.i am;here. "); assertEquals(0, expected.compareTo(result)); + + // Test Emoji trim + input = STRING_POINTABLE_EMOJI_FAMILY_OF_4; + pattern = "👨👦"; + patternPointable = generateUTF8Pointable(pattern); + codePointSet.clear(); + patternPointable.getCodePoints(codePointSet); + + // Trim left + storage.reset(); + input.trim(builder, storage, true, false, codePointSet); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable("\u200D" + "👨👦👦"); + assertEquals(0, expected.compareTo(result)); + + // Trim right + storage.reset(); + input.trim(builder, storage, false, true, codePointSet); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable("👨👨👦" + "\u200D"); + assertEquals(0, expected.compareTo(result)); + + // Trim left and right + storage.reset(); + input.trim(builder, storage, true, true, codePointSet); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable("\u200D" + "👨👦" + "\u200D"); + assertEquals(0, expected.compareTo(result)); + } + + @Test + public void testReverse() throws Exception { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray storage = new GrowableArray(); + UTF8StringPointable result = new UTF8StringPointable(); + UTF8StringPointable input = generateUTF8Pointable(" I'd like to reverse "); + UTF8StringPointable expected = generateUTF8Pointable(" esrever ot ekil d'I "); + + UTF8StringPointable.reverse(input, builder, storage); + result.set(storage.getByteArray(), 0, storage.getLength()); + assertEquals(0, expected.compareTo(result)); + } + + @Test + public void testReverseWithEmoji() throws IOException { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray storage = new GrowableArray(); + UTF8StringPointable result = new UTF8StringPointable(); + UTF8StringPointable input = generateUTF8Pointable("\uD83C\uDDE8\uD83C\uDDF3"); // CN flag + UTF8StringPointable expected = generateUTF8Pointable("\uD83C\uDDF3\uD83C\uDDE8"); // NC flag + + UTF8StringPointable.reverse(input, builder, storage); + result.set(storage.getByteArray(), 0, storage.getLength()); + assertEquals(0, expected.compareTo(result)); } + @Test + public void testStringBuilder() throws Exception { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray array = new GrowableArray(); + UTF8StringPointable stringPointable = new UTF8StringPointable(); + String writtenString; + int startIdx; + + array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength()); + String chunk = "ABC"; + String originalString = chunk.repeat(699051); + + // test grow path + startIdx = array.getLength(); + builder.reset(array, 2); + builder.appendString(originalString); + builder.finish(); + stringPointable.set(array.getByteArray(), startIdx, array.getLength()); + writtenString = stringPointable.toString(); + assertEquals(originalString, writtenString); + + // test shrink path + array.reset(); + array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength()); + startIdx = array.getLength(); + builder.reset(array, 699051); + builder.appendString(chunk); + builder.finish(); + stringPointable.set(array.getByteArray(), startIdx, array.getLength()); + writtenString = stringPointable.toString(); + assertEquals(chunk, writtenString); + } + }