[asterixdb] 02/03: Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'

2021-09-08 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 20314d118059da634847e1ddb1b1d744e552e713
Merge: 2521ade cc6143b
Author: Ali Alsuliman 
AuthorDate: Wed Sep 8 00:19:26 2021 +0300

Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'

Change-Id: I3e700b07781bec8fc5b9eabf15a1249ce2be0272

 .../substr-ASTERIXDB-2949.0.query.sqlpp| 25 
 .../substr-ASTERIXDB-2949.0.adm|  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 
 .../data/std/primitive/UTF8StringPointable.java|  3 +-
 .../data/std/util/AbstractVarLenObjectBuilder.java |  5 ++--
 .../std/primitive/UTF8StringPointableTest.java | 33 ++
 6 files changed, 69 insertions(+), 3 deletions(-)

diff --cc asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index f0b20bf,600dde8..1e142a2
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@@ -9926,8 -9301,13 +9926,13 @@@

  
  
 -  
 -substring-after-1
++  
++substr-ASTERIXDB-2949
+   
+ 
+ 
 -  
 -substring-after-2
 +  
 +regexp_position_with_flag/offset0/regex_position0_with_flag

  
  
diff --cc 
hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 828de18,eff71de..49f6221
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@@ -463,12 -368,14 +463,13 @@@ public final class UTF8StringPointable 
  return false;
  }
  
- builder.reset(out, Math.min(utfLen - byteIdx, (int) (codePointLength 
* 1.0 * byteIdx / codePointIdx)));
 -// for byteIdx = 0, this estimate assumes that every char size = 1 
byte
 -int estimateOutBytes = byteIdx == 0 ? charLength : (int) (charLength 
* 1.0 * byteIdx / chIdx);
++int estimateOutBytes = byteIdx == 0 ? codePointLength : (int) 
(codePointLength * 1.0 * byteIdx / codePointIdx);
+ builder.reset(out, Math.min(utfLen - byteIdx, estimateOutBytes));
 -chIdx = 0;
 -while (byteIdx < utfLen && chIdx < charLength) {
 -builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx));
 -chIdx++;
 -byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
 +codePointIdx = 0;
 +while (byteIdx < utfLen && codePointIdx < codePointLength) {
 +builder.appendCodePoint(src.codePointAt(src.getMetaDataLength() + 
byteIdx));
 +codePointIdx++;
 +byteIdx += src.codePointSize(src.getMetaDataLength() + byteIdx);
  }
  builder.finish();
  return true;
diff --cc 
hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
index dcf9a10,ed439cb..f088c7e
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@@ -306,60 -255,39 +306,93 @@@ public class UTF8StringPointableTest 
  result.set(storage.getByteArray(), 0, storage.getLength());
  expected = generateUTF8Pointable("is is it.i am;here.  ");
  assertEquals(0, expected.compareTo(result));
 +
 +// Test Emoji trim
 +input = STRING_POINTABLE_EMOJI_FAMILY_OF_4;
 +pattern = "";
 +patternPointable = generateUTF8Pointable(pattern);
 +codePointSet.clear();
 +patternPointable.getCodePoints(codePointSet);
 +
 +// Trim left
 +storage.reset();
 +input.trim(builder, storage, true, false, codePointSet);
 +result.set(storage.getByteArray(), 0, storage.getLength());
 +expected = generateUTF8Pointable("\u200D" + "‍‍");
 +assertEquals(0, expected.compareTo(result));
 +
 +// Trim right
 +storage.reset();
 +input.trim(builder, storage, false, true, codePointSet);
 +result.set(storage.getByteArray(), 0, storage.getLength());
 +expected = generateUTF8Pointable("‍‍" + "\u200D");
 +assertEquals(0, expected.compareTo(result));
 +
 +// Trim left and right
 +storage.reset();
 +input.trim(builder, storage, true, true, codePointSet);
 +result.set(storage.getByteArray(), 0, storage.getLength());
 +expected = 

[asterixdb] branch master updated (c84b073 -> d523ccc6)

2021-09-08 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git.


from c84b073  [ASTERIXDB-2933][COMP][EXT] Pushdowns Part3: Enable pushdown
 new cc6143b  [ASTERIXDB-2949][RUN][FUN] SUBSTR function produces malformed 
string
 new 20314d1  Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'
 new d523ccc6 Merge branch 'cheshire-cat' into master

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../substr-ASTERIXDB-2949.0.query.sqlpp}   | 13 ++---
 .../substr-ASTERIXDB-2949.0.adm|  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 
 .../data/std/primitive/UTF8StringPointable.java|  3 +-
 .../data/std/util/AbstractVarLenObjectBuilder.java |  5 ++--
 .../std/primitive/UTF8StringPointableTest.java | 33 ++
 6 files changed, 46 insertions(+), 14 deletions(-)
 copy 
asterixdb/asterix-app/src/test/resources/{optimizerts/queries_sqlpp/unnest-to-join_01.sqlpp
 => 
runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp}
 (74%)
 create mode 100644 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm


[asterixdb] 03/03: Merge branch 'cheshire-cat' into master

2021-09-08 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit d523ccc6e7a0998d6452bf1650d9d9bf31f463a3
Merge: c84b073 20314d1
Author: Murtadha Hubail 
AuthorDate: Wed Sep 8 19:59:11 2021 +0300

Merge branch 'cheshire-cat' into master

Change-Id: I762bdfd55c4aa54ea507facd8489f78ce3d73785

 .../substr-ASTERIXDB-2949.0.query.sqlpp| 25 
 .../substr-ASTERIXDB-2949.0.adm|  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 
 .../data/std/primitive/UTF8StringPointable.java|  3 +-
 .../data/std/util/AbstractVarLenObjectBuilder.java |  5 ++--
 .../std/primitive/UTF8StringPointableTest.java | 33 ++
 6 files changed, 69 insertions(+), 3 deletions(-)



[asterixdb] 01/03: [ASTERIXDB-2949][RUN][FUN] SUBSTR function produces malformed string

2021-09-08 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit cc6143b4ef5bb3f505478ada2bd95350a0758f6a
Author: Ali Alsuliman 
AuthorDate: Tue Aug 17 18:00:11 2021 +0300

[ASTERIXDB-2949][RUN][FUN] SUBSTR function produces malformed string

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Fix UTF8StringBuilder grow logic

UTF8StringBuilder initially takes an estimated length of the
string to be written and reserves space at the beginning
of the buffer to later store the length of the data written.
When the actual data written happens to be greater than the
estimated length requiring more space to store the length,
the string content needs to be shifted.

This patch is to fix the starting offset of the data to be shifted.
Also, the estimated length calculation of the substring method of
the UTF8StringPointable is modified to account for
SUBSTR(input_string, 0, num_chars_to_substring) with start offset = 0.

Change-Id: If36253ff884a9c19eaa130c4e5e926f2dd9eea1d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12864
Integration-Tests: Jenkins 
Tested-by: Jenkins 
Reviewed-by: Ali Alsuliman 
Reviewed-by: Ian Maxon 
---
 .../substr-ASTERIXDB-2949.0.query.sqlpp| 25 
 .../substr-ASTERIXDB-2949.0.adm|  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 
 .../data/std/primitive/UTF8StringPointable.java|  4 ++-
 .../data/std/util/AbstractVarLenObjectBuilder.java |  5 ++--
 .../std/primitive/UTF8StringPointableTest.java | 33 ++
 6 files changed, 70 insertions(+), 3 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp
new file mode 100644
index 000..22105a4
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description: Test the issue described in ASTERIXDB-2949
+ * Success: Yes
+ */
+
+
+SELECT 
SUBSTR("•\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n•\tabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
 0, 1000) AS s;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm
new file mode 100644
index 000..a36b551
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm
@@ -0,0 +1 @@
+{ "s": 
"•\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n•\tabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
 }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 8d06f71..600dde8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -9296,6 +9296,11 @@
   
 
 
+  
+substr-ASTERIXDB-2949
+  
+
+
   
 substring-after-1
   
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index