This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit d072c591ffb21710a357aba3374bcf60aab09f33
Author: Hussain Towaileb <[email protected]>
AuthorDate: Wed Nov 29 02:32:04 2023 +0300

    [ASTERIXDB-3321][FUN]: Return null and warn for string functions for 
invalid unicode sequence
    
    Change-Id: I67c04de2144f740fd63e85ecbd4efded544db62c
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17986
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
---
 .../string/invalid-unicode/test.000.query.sqlpp    | 34 ++++++++++++++++++++++
 .../results/string/invalid-unicode/result.000.adm  |  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   | 16 ++++++++++
 .../asterix/common/exceptions/ErrorCode.java       |  1 +
 .../src/main/resources/asx_errormsg/en.properties  |  1 +
 .../asterix/om/exceptions/ExceptionUtil.java       |  8 +++++
 .../functions/AbstractBinaryStringEval.java        |  4 +++
 .../functions/AbstractUnaryStringStringEval.java   |  4 +++
 .../functions/StringLengthDescriptor.java          |  4 +++
 .../functions/StringToCodePointDescriptor.java     |  4 +++
 .../data/std/primitive/UTF8StringPointable.java    | 19 ++++++------
 .../util/exceptions/UTF8EncodingException.java     | 25 ++++++++++++++++
 .../apache/hyracks/util/string/UTF8StringUtil.java |  8 +++--
 13 files changed, 117 insertions(+), 12 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/invalid-unicode/test.000.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/invalid-unicode/test.000.query.sqlpp
new file mode 100644
index 0000000000..a533d7efd2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/invalid-unicode/test.000.query.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// param max-warnings:json=1000
+
+[
+ string_length("\uDEAD x \uDEAD"),
+ string_to_codepoint("\uDEAD x \uDEAD"),
+ trim("\uDEAD x \uDEAD"),
+ ltrim("\uDEAD x \uDEAD"),
+ rtrim("\uDEAD x \uDEAD"),
+ trim("\uDEAD x \uDEAD", "x"),
+ ltrim("\uDEAD x \uDEAD", "x"),
+ rtrim("\uDEAD x \uDEAD", "x"),
+ reverse("\uDEAD x \uDEAD"),
+ position("\uDEAD x \uDEAD", "x"),
+ position1("\uDEAD x \uDEAD", "x")
+];
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/invalid-unicode/result.000.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/invalid-unicode/result.000.adm
new file mode 100644
index 0000000000..a9fc274160
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/invalid-unicode/result.000.adm
@@ -0,0 +1 @@
+[ null, null, null, null, null, null, null, null, null, null, null ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index dc31f7e14a..9a3abae1d0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -11341,6 +11341,22 @@
         <output-dir compare="Text">substring-after-5</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="string" check-warnings="true">
+      <compilation-unit name="invalid-unicode">
+        <output-dir compare="Text">invalid-unicode</output-dir>
+        <expected-warn>ASX0060: Function 'string-length' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'string-to-codepoint' failed to 
evaluate because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'trim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'trim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'rtrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'rtrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'ltrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'ltrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'reverse' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'position' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>ASX0060: Function 'position1' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="subquery">
     <test-case FilePath="subquery">
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index b0826e81f5..491034353e 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -87,6 +87,7 @@ public enum ErrorCode implements IError {
     PARQUET_CONTAINS_OVERFLOWED_BIGINT(57),
     UNEXPECTED_ERROR_ENCOUNTERED(58),
     INVALID_PARQUET_FILE(59),
+    FUNCTION_EVALUATION_FAILED(60),
 
     UNSUPPORTED_JRE(100),
 
diff --git 
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties 
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 0bf523a82d..074245c95a 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -94,6 +94,7 @@
 57 = Parquet file(s) contain unsigned integer that is larger than the '%1$s' 
range
 58 = Error encountered: %1$s
 59 = Invalid Parquet file: %1$s. Reason: %2$s
+60 = Function '%1$s' failed to evaluate because: %2$s
 
 100 = Unsupported JRE: %1$s
 
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
index ccb3a8d9cb..928a6b5195 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
@@ -143,6 +143,14 @@ public final class ExceptionUtil {
         warnInvalidValue(ctx, srcLoc, fid, argIdx, argValue, 
ErrorCode.NEGATIVE_VALUE);
     }
 
+    public static void warnStringFunctionFailed(IEvaluatorContext ctx, 
SourceLocation srcLoc, FunctionIdentifier fid,
+            String errMsg) {
+        if (ctx.getWarningCollector().shouldWarn()) {
+            ctx.getWarningCollector()
+                    .warn(Warning.of(srcLoc, 
ErrorCode.FUNCTION_EVALUATION_FAILED, fid.getName(), errMsg));
+        }
+    }
+
     private static void warnInvalidValue(IEvaluatorContext ctx, SourceLocation 
srcLoc, FunctionIdentifier fid,
             int argIdx, double argValue, ErrorCode errorCode) {
         IWarningCollector warningCollector = ctx.getWarningCollector();
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
index 2fc8654da6..9de704ba87 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
@@ -35,6 +35,7 @@ import 
org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 public abstract class AbstractBinaryStringEval implements IScalarEvaluator {
 
@@ -106,6 +107,9 @@ public abstract class AbstractBinaryStringEval implements 
IScalarEvaluator {
         // The actual processing.
         try {
             process(leftStringPointable, rightStringPointable, 
resultPointable);
+        } catch (UTF8EncodingException ex) {
+            PointableHelper.setNull(resultPointable);
+            ExceptionUtil.warnStringFunctionFailed(ctx, sourceLoc, funcID, 
ex.getMessage());
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
index 5efe529e72..7a60aae3f3 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
@@ -37,6 +37,7 @@ import 
org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.data.std.util.GrowableArray;
 import org.apache.hyracks.data.std.util.UTF8StringBuilder;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 abstract class AbstractUnaryStringStringEval implements IScalarEvaluator {
 
@@ -84,6 +85,9 @@ abstract class AbstractUnaryStringStringEval implements 
IScalarEvaluator {
         try {
             process(stringPtr, resultPointable);
             writeResult(resultPointable);
+        } catch (UTF8EncodingException ex) {
+            PointableHelper.setNull(resultPointable);
+            ExceptionUtil.warnStringFunctionFailed(ctx, sourceLoc, funcID, 
ex.getMessage());
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
index 47caf14ea1..d9c9ecb66a 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
@@ -41,6 +41,7 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 @MissingNullInOutFunction
@@ -89,6 +90,9 @@ public class StringLengthDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                             result.setValue(len);
                             int64Serde.serialize(result, out);
                             resultPointable.set(resultStorage);
+                        } catch (UTF8EncodingException ex) {
+                            PointableHelper.setNull(resultPointable);
+                            ExceptionUtil.warnStringFunctionFailed(ctx, 
sourceLoc, getIdentifier(), ex.getMessage());
                         } catch (IOException e1) {
                             throw HyracksDataException.create(e1);
                         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
index 2f6a2231b2..d4f5368c9d 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
@@ -43,6 +43,7 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 @MissingNullInOutFunction
@@ -109,6 +110,9 @@ public class StringToCodePointDescriptor extends 
AbstractScalarFunctionDynamicDe
                             }
                             listBuilder.write(out, true);
                             result.set(resultStorage);
+                        } catch (UTF8EncodingException ex) {
+                            PointableHelper.setNull(result);
+                            ExceptionUtil.warnStringFunctionFailed(ctx, 
sourceLoc, getIdentifier(), ex.getMessage());
                         } catch (IOException e1) {
                             throw HyracksDataException.create(e1);
                         }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 49f6221e4f..4acc8234b4 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -20,6 +20,7 @@ package org.apache.hyracks.data.std.primitive;
 
 import static 
org.apache.hyracks.util.string.UTF8StringUtil.HIGH_SURROGATE_WITHOUT_LOW_SURROGATE;
 import static 
org.apache.hyracks.util.string.UTF8StringUtil.LOW_SURROGATE_WITHOUT_HIGH_SURROGATE;
+import static org.apache.hyracks.util.string.UTF8StringUtil.MALFORMED_BYTES;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
@@ -35,6 +36,7 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.api.IPointableFactory;
 import org.apache.hyracks.data.std.util.GrowableArray;
 import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 import com.fasterxml.jackson.databind.JsonNode;
@@ -136,7 +138,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
         }
 
         if (byteIdx != utf8Length) {
-            throw new IllegalArgumentException("Decoding error: malformed 
bytes");
+            throw new UTF8EncodingException(MALFORMED_BYTES);
         }
     }
 
@@ -317,7 +319,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                     return startMatchPos;
                 } else {
                     if (prevHighSurrogate) {
-                        throw new 
IllegalArgumentException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
+                        throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
                     }
                     return codePointCount;
                 }
@@ -333,7 +335,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         codePointCount++;
                         prevHighSurrogate = false;
                     } else {
-                        throw new 
IllegalArgumentException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+                        throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
                     }
                 } else {
                     codePointCount++;
@@ -678,9 +680,9 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
             endIndex = startIndex;
             int cursorIndex = startIndex;
             while (cursorIndex < srcUtfLen) {
-                int codePioint = srcPtr.codePointAt(srcStart + cursorIndex);
+                int codePoint = srcPtr.codePointAt(srcStart + cursorIndex);
                 cursorIndex += srcPtr.codePointSize(srcStart + cursorIndex);
-                if (!codePointSet.contains(codePioint)) {
+                if (!codePointSet.contains(codePoint)) {
                     endIndex = cursorIndex;
                 }
             }
@@ -739,9 +741,8 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         cursorIndex--;
                         if (UTF8StringUtil.isCharStart(srcPtr.bytes, 
cursorIndex)) {
                             ch = UTF8StringUtil.charAt(srcPtr.bytes, 
cursorIndex);
-                            if (Character.isHighSurrogate(ch) == false) {
-                                throw new IllegalArgumentException(
-                                        "Decoding Error: no corresponding high 
surrogate found for the following low surrogate");
+                            if (!Character.isHighSurrogate(ch)) {
+                                throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
                             }
 
                             charSize += UTF8StringUtil.charSize(srcPtr.bytes, 
cursorIndex);
@@ -749,7 +750,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         }
                     }
                 } else if (Character.isHighSurrogate(ch)) {
-                    throw new IllegalArgumentException("Decoding Error: get a 
high surrogate without low surrogate");
+                    throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
                 }
 
                 builder.appendUtf8StringPointable(srcPtr, cursorIndex, 
charSize);
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
new file mode 100644
index 0000000000..3853a1f7a4
--- /dev/null
+++ 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util.exceptions;
+
+public class UTF8EncodingException extends IllegalArgumentException {
+    public UTF8EncodingException(String s) {
+        super(s);
+    }
+}
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 3eb868712b..15638f9610 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -28,6 +28,7 @@ import java.io.UTFDataFormatException;
 import java.lang.ref.SoftReference;
 
 import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 /**
  * A helper package to operate the UTF8String in Hyracks.
@@ -35,6 +36,7 @@ import 
org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
  */
 public class UTF8StringUtil {
 
+    public static final String MALFORMED_BYTES = "Decoding error: malformed 
bytes";
     public static final String LOW_SURROGATE_WITHOUT_HIGH_SURROGATE =
             "Decoding error: got a low surrogate without a leading high 
surrogate";
     public static final String HIGH_SURROGATE_WITHOUT_LOW_SURROGATE =
@@ -101,7 +103,7 @@ public class UTF8StringUtil {
 
         if (Character.isLowSurrogate(c1)) {
             // In this case, the index s doesn't point to a correct position
-            throw new 
IllegalArgumentException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+            throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
         }
 
         if (Character.isHighSurrogate(c1)) {
@@ -112,7 +114,7 @@ public class UTF8StringUtil {
             if (Character.isLowSurrogate(c2)) {
                 return Character.toCodePoint(c1, c2);
             } else {
-                throw new 
IllegalArgumentException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
+                throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
             }
         }
 
@@ -124,7 +126,7 @@ public class UTF8StringUtil {
         int size1 = charSize(b, s);
 
         if (Character.isLowSurrogate(c1)) {
-            throw new 
IllegalArgumentException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+            throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
         }
 
         if (Character.isHighSurrogate(c1)) {

Reply via email to