This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit b4a7d81b9925f94f1e06827e95cb54650ad6625f
Author: Hussain Towaileb <[email protected]>
AuthorDate: Fri Dec 1 07:54:00 2023 +0300

    [NO ISSUE]: Move StringUtils to hyracks-api module
    
    Change-Id: Iea1b7db9374332315dfaf56d49f24217f7c0834c
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17990
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Michael Blow <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../test/resources/runtimets/testsuite_sqlpp.xml   | 22 ++++++------
 .../asterix/om/exceptions/ExceptionUtil.java       | 14 ++++++--
 .../functions/AbstractBinaryStringEval.java        | 11 +++---
 .../functions/AbstractUnaryStringStringEval.java   | 11 +++---
 .../functions/StringContainsDescriptor.java        |  3 +-
 .../functions/StringLengthDescriptor.java          | 27 +++++++--------
 .../functions/StringPositionDescriptor.java        |  3 +-
 .../functions/StringPositionOffset1Descriptor.java |  3 +-
 .../functions/StringToCodePointDescriptor.java     | 29 ++++++++--------
 .../evaluators/functions/utils/StringTrimmer.java  |  8 +++--
 .../algebricks/algebricks-data/pom.xml             |  5 ---
 hyracks-fullstack/hyracks/hyracks-api/pom.xml      |  7 ++++
 .../apache/hyracks/api/exceptions/ErrorCode.java   |  1 +
 .../hyracks/api/exceptions/HyracksException.java   |  4 +++
 .../apache/hyracks/api/util/ErrorMessageUtil.java  |  7 ++++
 .../apache/hyracks/api/util/ExceptionUtils.java    | 12 +++++++
 .../hyracks/util/string/UTF8StringReader.java      |  0
 .../apache/hyracks/util/string/UTF8StringUtil.java | 22 ++++++------
 .../hyracks/util/string/UTF8StringWriter.java      |  0
 .../src/main/resources/errormsg/en.properties      |  2 ++
 .../api}/string/UTF8StringReaderWriterTest.java    |  4 ++-
 .../hyracks/api}/string/UTF8StringUtilTest.java    |  6 ++--
 .../data/std/primitive/UTF8StringPointable.java    | 39 ++++++++++++----------
 .../std/primitive/UTF8StringPointableTest.java     |  3 +-
 .../util/exceptions/UTF8EncodingException.java     | 25 --------------
 25 files changed, 152 insertions(+), 116 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 72578fa2ab..7bf9b307a3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -11331,17 +11331,17 @@
     <test-case FilePath="string" check-warnings="true">
       <compilation-unit name="invalid-unicode">
         <output-dir compare="Text">invalid-unicode</output-dir>
-        <expected-warn>ASX0060: Function 'string-length' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'string-to-codepoint' failed to 
evaluate because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'trim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'trim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'rtrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'rtrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'ltrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'ltrim' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'reverse' failed to evaluate because: 
Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'position' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
-        <expected-warn>ASX0060: Function 'position1' failed to evaluate 
because: Decoding error: got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>Function 'string-length' failed to evaluate because: 
Decoding error - got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>Function 'string-to-codepoint' failed to evaluate 
because: Decoding error - got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>Function 'trim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'trim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'rtrim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'rtrim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'ltrim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'ltrim' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'reverse' failed to evaluate because: Decoding 
error - got a low surrogate without a leading high surrogate</expected-warn>
+        <expected-warn>Function 'position' failed to evaluate because: 
Decoding error - got a low surrogate without a leading high 
surrogate</expected-warn>
+        <expected-warn>Function 'position1' failed to evaluate because: 
Decoding error - got a low surrogate without a leading high 
surrogate</expected-warn>
       </compilation-unit>
     </test-case>
   </test-group>
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
index 928a6b5195..daa9d838fd 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/exceptions/ExceptionUtil.java
@@ -19,6 +19,10 @@
 
 package org.apache.asterix.om.exceptions;
 
+import static 
org.apache.asterix.common.exceptions.ErrorCode.FUNCTION_EVALUATION_FAILED;
+import static 
org.apache.hyracks.api.exceptions.ErrorCode.INVALID_STRING_UNICODE;
+import static org.apache.hyracks.api.util.ExceptionUtils.isErrorCode;
+
 import java.util.function.Supplier;
 
 import org.apache.asterix.common.exceptions.ErrorCode;
@@ -26,6 +30,7 @@ import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.EnumDeserializer;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
 import org.apache.hyracks.api.exceptions.SourceLocation;
 import org.apache.hyracks.api.exceptions.Warning;
@@ -143,11 +148,10 @@ public final class ExceptionUtil {
         warnInvalidValue(ctx, srcLoc, fid, argIdx, argValue, 
ErrorCode.NEGATIVE_VALUE);
     }
 
-    public static void warnStringFunctionFailed(IEvaluatorContext ctx, 
SourceLocation srcLoc, FunctionIdentifier fid,
+    public static void warnFunctionEvalFailed(IEvaluatorContext ctx, 
SourceLocation srcLoc, FunctionIdentifier fid,
             String errMsg) {
         if (ctx.getWarningCollector().shouldWarn()) {
-            ctx.getWarningCollector()
-                    .warn(Warning.of(srcLoc, 
ErrorCode.FUNCTION_EVALUATION_FAILED, fid.getName(), errMsg));
+            ctx.getWarningCollector().warn(Warning.of(srcLoc, 
FUNCTION_EVALUATION_FAILED, fid.getName(), errMsg));
         }
     }
 
@@ -159,4 +163,8 @@ public final class ExceptionUtil {
                     Warning.of(srcLoc, errorCode, fid.getName(), 
indexToPosition(argIdx), Double.toString(argValue)));
         }
     }
+
+    public static boolean isStringUnicodeError(HyracksDataException throwable) 
{
+        return isErrorCode(throwable, INVALID_STRING_UNICODE);
+    }
 }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
index 9de704ba87..204020e118 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
@@ -35,7 +35,6 @@ import 
org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 public abstract class AbstractBinaryStringEval implements IScalarEvaluator {
 
@@ -107,9 +106,13 @@ public abstract class AbstractBinaryStringEval implements 
IScalarEvaluator {
         // The actual processing.
         try {
             process(leftStringPointable, rightStringPointable, 
resultPointable);
-        } catch (UTF8EncodingException ex) {
-            PointableHelper.setNull(resultPointable);
-            ExceptionUtil.warnStringFunctionFailed(ctx, sourceLoc, funcID, 
ex.getMessage());
+        } catch (HyracksDataException ex) {
+            if (ExceptionUtil.isStringUnicodeError(ex)) {
+                PointableHelper.setNull(resultPointable);
+                ExceptionUtil.warnFunctionEvalFailed(ctx, sourceLoc, funcID, 
ex.getMessageNoCode());
+                return;
+            }
+            throw ex;
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
index 7a60aae3f3..d92c9e9370 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java
@@ -37,7 +37,6 @@ import 
org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.data.std.util.GrowableArray;
 import org.apache.hyracks.data.std.util.UTF8StringBuilder;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 abstract class AbstractUnaryStringStringEval implements IScalarEvaluator {
 
@@ -85,9 +84,13 @@ abstract class AbstractUnaryStringStringEval implements 
IScalarEvaluator {
         try {
             process(stringPtr, resultPointable);
             writeResult(resultPointable);
-        } catch (UTF8EncodingException ex) {
-            PointableHelper.setNull(resultPointable);
-            ExceptionUtil.warnStringFunctionFailed(ctx, sourceLoc, funcID, 
ex.getMessage());
+        } catch (HyracksDataException ex) {
+            if (ExceptionUtil.isStringUnicodeError(ex)) {
+                PointableHelper.setNull(resultPointable);
+                ExceptionUtil.warnFunctionEvalFailed(ctx, sourceLoc, funcID, 
ex.getMessageNoCode());
+                return;
+            }
+            throw ex;
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringContainsDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringContainsDescriptor.java
index eeec70f7e0..f6b2a2df2d 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringContainsDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringContainsDescriptor.java
@@ -46,7 +46,8 @@ public class StringContainsDescriptor extends 
AbstractScalarFunctionDynamicDescr
                 return new AbstractBinaryStringBoolEval(ctx, args[0], args[1], 
BuiltinFunctions.STRING_CONTAINS,
                         sourceLoc) {
                     @Override
-                    protected boolean compute(UTF8StringPointable left, 
UTF8StringPointable right) {
+                    protected boolean compute(UTF8StringPointable left, 
UTF8StringPointable right)
+                            throws HyracksDataException {
                         return UTF8StringPointable.contains(left, right, 
false);
                     }
                 };
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
index d9c9ecb66a..da23ae5fe3 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLengthDescriptor.java
@@ -19,7 +19,6 @@
 package org.apache.asterix.runtime.evaluators.functions;
 
 import java.io.DataOutput;
-import java.io.IOException;
 
 import org.apache.asterix.common.annotations.MissingNullInOutFunction;
 import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
@@ -41,7 +40,6 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 @MissingNullInOutFunction
@@ -57,13 +55,13 @@ public class StringLengthDescriptor extends 
AbstractScalarFunctionDynamicDescrip
             @Override
             public IScalarEvaluator createScalarEvaluator(final 
IEvaluatorContext ctx) throws HyracksDataException {
                 return new IScalarEvaluator() {
-                    private AMutableInt64 result = new AMutableInt64(0);
-                    private ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
-                    private DataOutput out = resultStorage.getDataOutput();
-                    private IPointable inputArg = new VoidPointable();
-                    private IScalarEvaluator eval = 
args[0].createScalarEvaluator(ctx);
+                    private final AMutableInt64 result = new AMutableInt64(0);
+                    private final ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
+                    private final DataOutput out = 
resultStorage.getDataOutput();
+                    private final IPointable inputArg = new VoidPointable();
+                    private final IScalarEvaluator eval = 
args[0].createScalarEvaluator(ctx);
                     @SuppressWarnings("unchecked")
-                    private ISerializerDeserializer<AInt64> int64Serde =
+                    private final ISerializerDeserializer<AInt64> int64Serde =
                             
SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
 
                     @Override
@@ -90,11 +88,14 @@ public class StringLengthDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                             result.setValue(len);
                             int64Serde.serialize(result, out);
                             resultPointable.set(resultStorage);
-                        } catch (UTF8EncodingException ex) {
-                            PointableHelper.setNull(resultPointable);
-                            ExceptionUtil.warnStringFunctionFailed(ctx, 
sourceLoc, getIdentifier(), ex.getMessage());
-                        } catch (IOException e1) {
-                            throw HyracksDataException.create(e1);
+                        } catch (HyracksDataException ex) {
+                            if (ExceptionUtil.isStringUnicodeError(ex)) {
+                                PointableHelper.setNull(resultPointable);
+                                ExceptionUtil.warnFunctionEvalFailed(ctx, 
sourceLoc, getIdentifier(),
+                                        ex.getMessageNoCode());
+                                return;
+                            }
+                            throw ex;
                         }
                     }
                 };
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java
index 6c060563d0..051083f422 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java
@@ -47,7 +47,8 @@ public class StringPositionDescriptor extends 
AbstractScalarFunctionDynamicDescr
                         StringPositionDescriptor.this.getIdentifier(), 
sourceLoc) {
 
                     @Override
-                    protected int compute(UTF8StringPointable left, 
UTF8StringPointable right) {
+                    protected int compute(UTF8StringPointable left, 
UTF8StringPointable right)
+                            throws HyracksDataException {
                         return UTF8StringPointable.findInCodePoint(left, 
right, false);
                     }
                 };
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionOffset1Descriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionOffset1Descriptor.java
index 93ada0f07d..668e03ccd8 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionOffset1Descriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionOffset1Descriptor.java
@@ -47,7 +47,8 @@ public class StringPositionOffset1Descriptor extends 
AbstractScalarFunctionDynam
                         StringPositionOffset1Descriptor.this.getIdentifier(), 
sourceLoc) {
 
                     @Override
-                    protected int compute(UTF8StringPointable left, 
UTF8StringPointable right) {
+                    protected int compute(UTF8StringPointable left, 
UTF8StringPointable right)
+                            throws HyracksDataException {
                         int pos = UTF8StringPointable.findInCodePoint(left, 
right, false);
                         return pos < 0 ? pos : pos + 1;
                     }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
index d4f5368c9d..3320c2e9bb 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringToCodePointDescriptor.java
@@ -19,7 +19,6 @@
 package org.apache.asterix.runtime.evaluators.functions;
 
 import java.io.DataOutput;
-import java.io.IOException;
 
 import org.apache.asterix.builders.OrderedListBuilder;
 import org.apache.asterix.common.annotations.MissingNullInOutFunction;
@@ -43,7 +42,6 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.VoidPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 @MissingNullInOutFunction
@@ -61,14 +59,14 @@ public class StringToCodePointDescriptor extends 
AbstractScalarFunctionDynamicDe
             @Override
             public IScalarEvaluator createScalarEvaluator(final 
IEvaluatorContext ctx) throws HyracksDataException {
                 return new IScalarEvaluator() {
-                    protected final ArrayBackedValueStorage resultStorage = 
new ArrayBackedValueStorage();
-                    protected final DataOutput out = 
resultStorage.getDataOutput();
-                    protected final IPointable argPtr = new VoidPointable();
-                    protected final IScalarEvaluator stringEval = 
args[0].createScalarEvaluator(ctx);
-                    protected final AOrderedListType intListType = new 
AOrderedListType(BuiltinType.AINT64, null);
+                    private final ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
+                    private final DataOutput out = 
resultStorage.getDataOutput();
+                    private final IPointable argPtr = new VoidPointable();
+                    private final IScalarEvaluator stringEval = 
args[0].createScalarEvaluator(ctx);
+                    private final AOrderedListType intListType = new 
AOrderedListType(BuiltinType.AINT64, null);
 
-                    private OrderedListBuilder listBuilder = new 
OrderedListBuilder();
-                    private ArrayBackedValueStorage inputVal = new 
ArrayBackedValueStorage();
+                    private final OrderedListBuilder listBuilder = new 
OrderedListBuilder();
+                    private final ArrayBackedValueStorage inputVal = new 
ArrayBackedValueStorage();
 
                     @SuppressWarnings("unchecked")
                     private final ISerializerDeserializer<AInt64> int64Serde =
@@ -110,11 +108,14 @@ public class StringToCodePointDescriptor extends 
AbstractScalarFunctionDynamicDe
                             }
                             listBuilder.write(out, true);
                             result.set(resultStorage);
-                        } catch (UTF8EncodingException ex) {
-                            PointableHelper.setNull(result);
-                            ExceptionUtil.warnStringFunctionFailed(ctx, 
sourceLoc, getIdentifier(), ex.getMessage());
-                        } catch (IOException e1) {
-                            throw HyracksDataException.create(e1);
+                        } catch (HyracksDataException ex) {
+                            if (ExceptionUtil.isStringUnicodeError(ex)) {
+                                PointableHelper.setNull(result);
+                                ExceptionUtil.warnFunctionEvalFailed(ctx, 
sourceLoc, getIdentifier(),
+                                        ex.getMessageNoCode());
+                                return;
+                            }
+                            throw ex;
                         }
                     }
                 };
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringTrimmer.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringTrimmer.java
index 8dc41f5fcc..0ddf459217 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringTrimmer.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringTrimmer.java
@@ -22,6 +22,7 @@ package org.apache.asterix.runtime.evaluators.functions.utils;
 import java.io.IOException;
 
 import org.apache.asterix.runtime.evaluators.functions.StringEvaluatorUtils;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
@@ -51,7 +52,7 @@ public class StringTrimmer {
      * @param resultArray
      *            , the byte array to hold results.
      */
-    public StringTrimmer(UTF8StringBuilder resultBuilder, GrowableArray 
resultArray) {
+    public StringTrimmer(UTF8StringBuilder resultBuilder, GrowableArray 
resultArray) throws HyracksDataException {
         this(resultBuilder, resultArray, null);
     }
 
@@ -63,7 +64,8 @@ public class StringTrimmer {
      * @param pattern
      *            , the string that is used to construct the charset for 
trimming.
      */
-    public StringTrimmer(UTF8StringBuilder resultBuilder, GrowableArray 
resultArray, UTF8StringPointable pattern) {
+    public StringTrimmer(UTF8StringBuilder resultBuilder, GrowableArray 
resultArray, UTF8StringPointable pattern)
+            throws HyracksDataException {
         this.resultBuilder = resultBuilder;
         this.resultArray = resultArray;
         if (pattern != null) {
@@ -78,7 +80,7 @@ public class StringTrimmer {
      * @param patternPtr
      *            , a pattern string.
      */
-    public void build(UTF8StringPointable patternPtr) {
+    public void build(UTF8StringPointable patternPtr) throws 
HyracksDataException {
         final boolean newPattern = (codePointSet.size() == 0) || 
lastPatternPtr.compareTo(patternPtr) != 0;
         if (newPattern) {
             StringEvaluatorUtils.copyResetUTF8Pointable(patternPtr, 
lastPatternStorage, lastPatternPtr);
diff --git a/hyracks-fullstack/algebricks/algebricks-data/pom.xml 
b/hyracks-fullstack/algebricks/algebricks-data/pom.xml
index 81edbda40d..8ca0765832 100644
--- a/hyracks-fullstack/algebricks/algebricks-data/pom.xml
+++ b/hyracks-fullstack/algebricks/algebricks-data/pom.xml
@@ -52,11 +52,6 @@
       <artifactId>hyracks-data-std</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hyracks</groupId>
-      <artifactId>hyracks-util</artifactId>
-      <version>${project.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.hyracks</groupId>
       <artifactId>hyracks-api</artifactId>
diff --git a/hyracks-fullstack/hyracks/hyracks-api/pom.xml 
b/hyracks-fullstack/hyracks/hyracks-api/pom.xml
index 047f066b54..131731d9f0 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-api/pom.xml
@@ -64,6 +64,13 @@
       <artifactId>hyracks-util</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-util</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 729147340c..8170f072e9 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -154,6 +154,7 @@ public enum ErrorCode implements IError {
     PARSING_ERROR(124),
     INVALID_INVERTED_LIST_TYPE_TRAITS(125),
     ILLEGAL_STATE(126),
+    INVALID_STRING_UNICODE(127),
 
     // Compilation error codes.
     RULECOLLECTION_NOT_INSTANCE_OF_LIST(10000),
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/HyracksException.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/HyracksException.java
index 977e5d2959..12f10959de 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/HyracksException.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/HyracksException.java
@@ -158,6 +158,10 @@ public class HyracksException extends IOException 
implements IFormattedException
         return message;
     }
 
+    public String getMessageNoCode() {
+        return ErrorMessageUtil.getMessageNoCode(component, getMessage());
+    }
+
     @Override
     public String toString() {
         return getLocalizedMessage();
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ErrorMessageUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ErrorMessageUtil.java
index 70b13fa739..cb0d579721 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ErrorMessageUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ErrorMessageUtil.java
@@ -125,6 +125,13 @@ public class ErrorMessageUtil {
         }
     }
 
+    public static String getMessageNoCode(String component, String message) {
+        if (NONE.equals(component)) {
+            return message;
+        }
+        return message.substring(message.indexOf(":") + 2);
+    }
+
     public static String getCauseMessage(Throwable t) {
         if (t instanceof IFormattedException) {
             return t.getMessage();
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ExceptionUtils.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ExceptionUtils.java
index 7147542d81..e07cdd4b78 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ExceptionUtils.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/ExceptionUtils.java
@@ -207,4 +207,16 @@ public class ExceptionUtils {
     public static String getMessageOrToString(Throwable e) {
         return e instanceof IFormattedException ? e.getMessage() : 
e.toString();
     }
+
+    /**
+     * Checks if the error code of the throwable is of the provided type
+     *
+     * @param throwable throwable with error code
+     * @param code error code to match against
+     *
+     * @return true if error code matches, false otherwise
+     */
+    public static boolean isErrorCode(HyracksDataException throwable, 
ErrorCode code) {
+        return throwable.getError().isPresent() && throwable.getError().get() 
== code;
+    }
 }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
similarity index 100%
rename from 
hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
similarity index 96%
rename from 
hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 15638f9610..4fc503d8a3 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -17,6 +17,8 @@
 
 package org.apache.hyracks.util.string;
 
+import static 
org.apache.hyracks.api.exceptions.ErrorCode.INVALID_STRING_UNICODE;
+
 import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -27,8 +29,8 @@ import java.io.OutputStream;
 import java.io.UTFDataFormatException;
 import java.lang.ref.SoftReference;
 
+import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 
 /**
  * A helper package to operate the UTF8String in Hyracks.
@@ -36,11 +38,11 @@ import 
org.apache.hyracks.util.exceptions.UTF8EncodingException;
  */
 public class UTF8StringUtil {
 
-    public static final String MALFORMED_BYTES = "Decoding error: malformed 
bytes";
+    public static final String MALFORMED_BYTES = "malformed bytes";
     public static final String LOW_SURROGATE_WITHOUT_HIGH_SURROGATE =
-            "Decoding error: got a low surrogate without a leading high 
surrogate";
+            "got a low surrogate without a leading high surrogate";
     public static final String HIGH_SURROGATE_WITHOUT_LOW_SURROGATE =
-            "Decoding error: got a high surrogate without a following low 
surrogate";
+            "got a high surrogate without a following low surrogate";
 
     private UTF8StringUtil() {
     }
@@ -98,12 +100,12 @@ public class UTF8StringUtil {
         }
     }
 
-    public static int codePointAt(byte[] b, int s) {
+    public static int codePointAt(byte[] b, int s) throws HyracksDataException 
{
         char c1 = charAt(b, s);
 
         if (Character.isLowSurrogate(c1)) {
             // In this case, the index s doesn't point to a correct position
-            throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+            throw HyracksDataException.create(INVALID_STRING_UNICODE, 
LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
         }
 
         if (Character.isHighSurrogate(c1)) {
@@ -114,19 +116,19 @@ public class UTF8StringUtil {
             if (Character.isLowSurrogate(c2)) {
                 return Character.toCodePoint(c1, c2);
             } else {
-                throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
+                throw HyracksDataException.create(INVALID_STRING_UNICODE, 
HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
             }
         }
 
         return c1;
     }
 
-    public static int codePointSize(byte[] b, int s) {
+    public static int codePointSize(byte[] b, int s) throws 
HyracksDataException {
         char c1 = charAt(b, s);
         int size1 = charSize(b, s);
 
         if (Character.isLowSurrogate(c1)) {
-            throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+            throw HyracksDataException.create(INVALID_STRING_UNICODE, 
LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
         }
 
         if (Character.isHighSurrogate(c1)) {
@@ -206,7 +208,7 @@ public class UTF8StringUtil {
         return charCount;
     }
 
-    public static int getNumCodePoint(byte[] b, int s) {
+    public static int getNumCodePoint(byte[] b, int s) throws 
HyracksDataException {
         int len = getUTFLength(b, s);
         int pos = s + getNumBytesToStoreLength(len);
         int end = pos + len;
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
similarity index 100%
rename from 
hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 4d9c60bfcb..7db5d493a1 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -144,6 +144,8 @@
 124 = Parsing error %s: %s
 125 = Invalid inverted list type traits: %1$s
 126 = Illegal state. %1$s
+127 = Decoding error - %1$s
+
 
 10000 = The given rule collection %1$s is not an instance of the List class.
 10001 = Cannot compose partition constraint %1$s with %2$s
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringReaderWriterTest.java
similarity index 95%
rename from 
hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringReaderWriterTest.java
index 9010c9cc5b..abba958017 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringReaderWriterTest.java
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.apache.hyracks.util.string;
+package org.apache.hyracks.api.string;
 
 import static org.apache.hyracks.util.string.UTF8StringSample.EMPTY_STRING;
 import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
@@ -37,6 +37,8 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 import org.junit.Test;
 
 public class UTF8StringReaderWriterTest {
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringUtilTest.java
similarity index 97%
rename from 
hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringUtilTest.java
index c7468d2c44..6f3782b078 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/test/java/org/apache/hyracks/api/string/UTF8StringUtilTest.java
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.apache.hyracks.util.string;
+package org.apache.hyracks.api.string;
 
 import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
 import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_128;
@@ -46,6 +46,8 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.List;
 
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 import org.junit.Test;
 
 public class UTF8StringUtilTest {
@@ -178,7 +180,7 @@ public class UTF8StringUtilTest {
     }
 
     @Test
-    public void testGetNumCodePoint() {
+    public void testGetNumCodePoint() throws HyracksDataException {
         String str = 
"\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66";
         assertEquals(getNumCodePoint(writeStringToBytes(str), 0), 7);
 
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 4acc8234b4..47d248877c 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -18,6 +18,7 @@
  */
 package org.apache.hyracks.data.std.primitive;
 
+import static 
org.apache.hyracks.api.exceptions.ErrorCode.INVALID_STRING_UNICODE;
 import static 
org.apache.hyracks.util.string.UTF8StringUtil.HIGH_SURROGATE_WITHOUT_LOW_SURROGATE;
 import static 
org.apache.hyracks.util.string.UTF8StringUtil.LOW_SURROGATE_WITHOUT_HIGH_SURROGATE;
 import static org.apache.hyracks.util.string.UTF8StringUtil.MALFORMED_BYTES;
@@ -36,7 +37,6 @@ import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.api.IPointableFactory;
 import org.apache.hyracks.data.std.util.GrowableArray;
 import org.apache.hyracks.data.std.util.UTF8StringBuilder;
-import org.apache.hyracks.util.exceptions.UTF8EncodingException;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
 import com.fasterxml.jackson.databind.JsonNode;
@@ -122,15 +122,15 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
         return UTF8StringUtil.charSize(bytes, start + offset);
     }
 
-    public int codePointAt(int offset) {
+    public int codePointAt(int offset) throws HyracksDataException {
         return UTF8StringUtil.codePointAt(bytes, start + offset);
     }
 
-    public int codePointSize(int offset) {
+    public int codePointSize(int offset) throws HyracksDataException {
         return UTF8StringUtil.codePointSize(bytes, start + offset);
     }
 
-    public void getCodePoints(IntCollection codePointSet) {
+    public void getCodePoints(IntCollection codePointSet) throws 
HyracksDataException {
         int byteIdx = 0;
         while (byteIdx < utf8Length) {
             codePointSet.add(codePointAt(metaLength + byteIdx));
@@ -138,7 +138,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
         }
 
         if (byteIdx != utf8Length) {
-            throw new UTF8EncodingException(MALFORMED_BYTES);
+            throw HyracksDataException.create(INVALID_STRING_UNICODE, 
MALFORMED_BYTES);
         }
     }
 
@@ -204,7 +204,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                 other.getStartOffset());
     }
 
-    public int find(UTF8StringPointable pattern, boolean ignoreCase) {
+    public int find(UTF8StringPointable pattern, boolean ignoreCase) throws 
HyracksDataException {
         return find(this, pattern, ignoreCase);
     }
 
@@ -229,7 +229,8 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
      *            to ignore case or not.
      * @return the byte offset of the first character of the matching string. 
Not including the MetaLength.
      */
-    public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase) {
+    public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase)
+            throws HyracksDataException {
         return find(src, pattern, ignoreCase, 0);
     }
 
@@ -242,7 +243,8 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
      *            to ignore case or not.
      * @return the offset in the unit of code point of the first character of 
the matching string. Not including the MetaLength.
      */
-    public static int findInCodePoint(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase) {
+    public static int findInCodePoint(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase)
+            throws HyracksDataException {
         return findInByteOrCodePoint(src, pattern, ignoreCase, 0, false);
     }
 
@@ -258,7 +260,8 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
      * @return the byte offset of the first character of the matching string 
after <code>startMatchPos}</code>.
      *         Not including the MetaLength.
      */
-    public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase, int startMatch) {
+    public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase, int startMatch)
+            throws HyracksDataException {
         return findInByteOrCodePoint(src, pattern, ignoreCase, startMatch, 
true);
     }
 
@@ -274,13 +277,13 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
      * @return the offset in the unit of code point of the first character of 
the matching string. Not including the MetaLength.
      */
     public static int findInCodePoint(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase,
-            int startMatch) {
+            int startMatch) throws HyracksDataException {
         return findInByteOrCodePoint(src, pattern, ignoreCase, startMatch, 
false);
     }
 
     // If resultInByte is true, then return the position in bytes, otherwise 
return the position in code points
     private static int findInByteOrCodePoint(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase,
-            int startMatch, boolean resultInByte) {
+            int startMatch, boolean resultInByte) throws HyracksDataException {
         int startMatchPos = startMatch;
         final int srcUtfLen = src.getUTF8Length();
         final int pttnUtfLen = pattern.getUTF8Length();
@@ -319,7 +322,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                     return startMatchPos;
                 } else {
                     if (prevHighSurrogate) {
-                        throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
+                        throw 
HyracksDataException.create(INVALID_STRING_UNICODE, 
HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
                     }
                     return codePointCount;
                 }
@@ -335,7 +338,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         codePointCount++;
                         prevHighSurrogate = false;
                     } else {
-                        throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+                        throw 
HyracksDataException.create(INVALID_STRING_UNICODE, 
LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
                     }
                 } else {
                     codePointCount++;
@@ -347,11 +350,12 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
         return -1;
     }
 
-    public boolean contains(UTF8StringPointable pattern, boolean ignoreCase) {
+    public boolean contains(UTF8StringPointable pattern, boolean ignoreCase) 
throws HyracksDataException {
         return contains(this, pattern, ignoreCase);
     }
 
-    public static boolean contains(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase) {
+    public static boolean contains(UTF8StringPointable src, 
UTF8StringPointable pattern, boolean ignoreCase)
+            throws HyracksDataException {
         return find(src, pattern, ignoreCase) >= 0;
     }
 
@@ -742,7 +746,8 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         if (UTF8StringUtil.isCharStart(srcPtr.bytes, 
cursorIndex)) {
                             ch = UTF8StringUtil.charAt(srcPtr.bytes, 
cursorIndex);
                             if (!Character.isHighSurrogate(ch)) {
-                                throw new 
UTF8EncodingException(LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
+                                throw 
HyracksDataException.create(INVALID_STRING_UNICODE,
+                                        LOW_SURROGATE_WITHOUT_HIGH_SURROGATE);
                             }
 
                             charSize += UTF8StringUtil.charSize(srcPtr.bytes, 
cursorIndex);
@@ -750,7 +755,7 @@ public final class UTF8StringPointable extends 
AbstractPointable implements IHas
                         }
                     }
                 } else if (Character.isHighSurrogate(ch)) {
-                    throw new 
UTF8EncodingException(HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
+                    throw HyracksDataException.create(INVALID_STRING_UNICODE, 
HIGH_SURROGATE_WITHOUT_LOW_SURROGATE);
                 }
 
                 builder.appendUtf8StringPointable(srcPtr, cursorIndex, 
charSize);
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
index f088c7eeae..45a5ba32ce 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@ -30,6 +30,7 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.util.GrowableArray;
 import org.apache.hyracks.data.std.util.UTF8StringBuilder;
 import org.apache.hyracks.util.string.UTF8StringSample;
@@ -68,7 +69,7 @@ public class UTF8StringPointableTest {
     }
 
     @Test
-    public void testFindInCodePoint() {
+    public void testFindInCodePoint() throws HyracksDataException {
         UTF8StringPointable strp = 
generateUTF8Pointable(STRING_EMOJI_FAMILY_OF_4 + EMOJI_BASKETBALL);
         UTF8StringPointable pattern = generateUTF8Pointable(EMOJI_BASKETBALL);
 
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
deleted file mode 100644
index 3853a1f7a4..0000000000
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/exceptions/UTF8EncodingException.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.util.exceptions;
-
-public class UTF8EncodingException extends IllegalArgumentException {
-    public UTF8EncodingException(String s) {
-        super(s);
-    }
-}

Reply via email to