Repository: asterixdb Updated Branches: refs/heads/master 70daead8b -> c8ba268f7
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 6bb1886..7d064a5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -6201,33 +6201,18 @@ </compilation-unit> </test-case> <test-case FilePath="string"> - <compilation-unit name="regexp_replace"> - <output-dir compare="Text">replace22</output-dir> - </compilation-unit> - </test-case> - <test-case FilePath="string"> - <compilation-unit name="replace1"> - <output-dir compare="Text">replace1</output-dir> + <compilation-unit name="replace"> + <output-dir compare="Text">replace</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> - <compilation-unit name="replace2"> - <output-dir compare="Text">replace2</output-dir> + <compilation-unit name="replace_with_limit"> + <output-dir compare="Text">replace_with_limit</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> - <compilation-unit name="replace21"> - <output-dir compare="Text">replace21</output-dir> - </compilation-unit> - </test-case> - <test-case FilePath="string"> - <compilation-unit name="replace22"> - <output-dir compare="Text">replace22</output-dir> - </compilation-unit> - </test-case> - <test-case FilePath="string"> - <compilation-unit name="replace3"> - <output-dir compare="Text">replace3</output-dir> + <compilation-unit name="regexp_replace"> + <output-dir compare="Text">regexp_replace</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md index 1f56c4c..0e548a7 100644 --- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md @@ -332,7 +332,7 @@ * Checks whether the string `string` matches the given regular expression pattern `string_pattern` (a Java regular expression pattern), - and replace the matched pattern `string_pattern` with the new pattern `string_replacement`. + and replaces the matched pattern `string_pattern` with the new pattern `string_replacement`. * Arguments: * `string` : a `string` that might contain the pattern, * `string_pattern` : a pattern `string` to be matched, @@ -381,6 +381,38 @@ "testtesttest" +### replace ### + * Syntax: + + replace(string, search_string, replacement_string[, limit]) + + * Finds occurrences of the given substring `search_string` in the input string `string` + and replaces them with the new substring `replacement_string`. + * Arguments: + * `string` : an input `string`, + * `search_string` : a `string` substring to be searched for, + * `replacement_string` : a `string` to be used as the replacement, + * `limit` : (Optional) an `integer` - maximum number of occurrences to be replaced. + If not specified then all occurrences will be replaced + * Return Value: + * Returns a `string` that is obtained after the replacements, + * `missing` if any argument is a `missing` value, + * any other non-string input value or non-integer `limit` will cause a type error, + * `null` if any argument is a `null` value but no argument is a `missing` value. + + * Example: + + { + "v1": replace(" like x-phone the voicemail_service is awesome", " like x-phone", "like product-a"), + "v2": replace("x-phone and x-phone", "x-phone", "product-a", 1) + }; + + * The expected result is: + + { + "v1": "like product-a the voicemail_service is awesome", + "v2": "product-a and x-phone" + } ### rtrim ### * Syntax: http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java index 0646224..2aaf08f 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java @@ -39,7 +39,6 @@ public class CommonFunctionMapUtil { FUNCTION_NAME_MAP.put("upper", "uppercase"); // upper, internal: uppercase FUNCTION_NAME_MAP.put("title", "initcap"); // title, internal: initcap FUNCTION_NAME_MAP.put("regexp_contains", "matches"); // regexp_contains, internal: matches - FUNCTION_NAME_MAP.put("regexp_replace", "replace"); //regexp_replace, internal: replace FUNCTION_NAME_MAP.put("power", "caret"); //pow, internal: caret FUNCTION_NAME_MAP.put("int", "integer"); // int, internal: integer http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index 3e3cf43..92617ee 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -287,6 +287,10 @@ public class BuiltinFunctions { new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-position", 2); public static final FunctionIdentifier STRING_REGEXP_POSITION_WITH_FLAG = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-position", 3); + public static final FunctionIdentifier STRING_REGEXP_REPLACE = + new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-replace", 3); + public static final FunctionIdentifier STRING_REGEXP_REPLACE_WITH_FLAG = + new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-replace", 4); public static final FunctionIdentifier STRING_LOWERCASE = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "lowercase", 1); public static final FunctionIdentifier STRING_UPPERCASE = @@ -309,7 +313,7 @@ public class BuiltinFunctions { new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "position", 2); public static final FunctionIdentifier STRING_REPLACE = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "replace", 3); - public static final FunctionIdentifier STRING_REPLACE_WITH_FLAG = + public static final FunctionIdentifier STRING_REPLACE_WITH_LIMIT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "replace", 4); public static final FunctionIdentifier STRING_LENGTH = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "string-length", 1); @@ -1043,8 +1047,10 @@ public class BuiltinFunctions { addFunction(STRING_REGEXP_LIKE_WITH_FLAG, StringBooleanTypeComputer.INSTANCE, true); addFunction(STRING_REGEXP_POSITION, StringInt32TypeComputer.INSTANCE, true); addFunction(STRING_REGEXP_POSITION_WITH_FLAG, StringInt32TypeComputer.INSTANCE, true); + addFunction(STRING_REGEXP_REPLACE, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_REGEXP_REPLACE_WITH_FLAG, StringStringTypeComputer.INSTANCE, true); addFunction(STRING_REPLACE, StringStringTypeComputer.INSTANCE, true); - addFunction(STRING_REPLACE_WITH_FLAG, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_REPLACE_WITH_LIMIT, StringIntToStringTypeComputer.INSTANCE_TRIPLE_STRING, true); addFunction(SUBSTRING_BEFORE, StringStringTypeComputer.INSTANCE, true); addFunction(SUBSTRING_AFTER, StringStringTypeComputer.INSTANCE, true); addPrivateFunction(STRING_EQUAL, StringBooleanTypeComputer.INSTANCE, true); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java index 5b3cf54..e29b6ec 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java @@ -27,15 +27,24 @@ import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; public class StringIntToStringTypeComputer extends AbstractResultTypeComputer { - public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(); + public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(1); + + public static final StringIntToStringTypeComputer INSTANCE_TRIPLE_STRING = new StringIntToStringTypeComputer(3); + + private final int stringArgCount; + + public StringIntToStringTypeComputer(int stringArgCount) { + this.stringArgCount = stringArgCount; + } @Override public void checkArgType(String funcName, int argIndex, IAType type) throws AlgebricksException { ATypeTag tag = type.getTypeTag(); - if (argIndex == 0 && tag != ATypeTag.STRING) { - throw new TypeMismatchException(funcName, argIndex, tag, ATypeTag.STRING); - } - if (argIndex == 1) { + if (argIndex < stringArgCount) { + if (tag != ATypeTag.STRING) { + throw new TypeMismatchException(funcName, argIndex, tag, ATypeTag.STRING); + } + } else { switch (tag) { case TINYINT: case SMALLINT: http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractStringStringStringIntEval.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractStringStringStringIntEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractStringStringStringIntEval.java new file mode 100644 index 0000000..89bba67 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractStringStringStringIntEval.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.DataOutput; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.hierachy.ATypeHierarchy; +import org.apache.asterix.runtime.exceptions.TypeMismatchException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; + +public abstract class AbstractStringStringStringIntEval implements IScalarEvaluator { + // Argument evaluators. + private IScalarEvaluator eval0; + private IScalarEvaluator eval1; + private IScalarEvaluator eval2; + private IScalarEvaluator eval3; + + // Argument pointables. + final IPointable argPtrFirst = new VoidPointable(); + final IPointable argPtrSecond = new VoidPointable(); + final IPointable argPtrThird = new VoidPointable(); + final IPointable argPtrFourth = new VoidPointable(); + private final UTF8StringPointable strPtr1st = new UTF8StringPointable(); + private final UTF8StringPointable strPtr2nd = new UTF8StringPointable(); + private final UTF8StringPointable strPtr3rd = new UTF8StringPointable(); + + // For outputting results. + ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + DataOutput dout = resultStorage.getDataOutput(); + + // Function ID, for error reporting. + protected final FunctionIdentifier funcID; + + AbstractStringStringStringIntEval(IHyracksTaskContext context, IScalarEvaluatorFactory eval0, + IScalarEvaluatorFactory eval1, IScalarEvaluatorFactory eval2, IScalarEvaluatorFactory eval3, + FunctionIdentifier funcID) throws HyracksDataException { + this.eval0 = eval0.createScalarEvaluator(context); + this.eval1 = eval1.createScalarEvaluator(context); + this.eval2 = eval2.createScalarEvaluator(context); + this.eval3 = eval3.createScalarEvaluator(context); + this.funcID = funcID; + } + + @SuppressWarnings("unchecked") + @Override + public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException { + // Gets the first argument. + eval0.evaluate(tuple, argPtrFirst); + byte[] bytes0 = argPtrFirst.getByteArray(); + int start0 = argPtrFirst.getStartOffset(); + int len0 = argPtrFirst.getLength(); + + // Gets the second argument. + eval1.evaluate(tuple, argPtrSecond); + byte[] bytes1 = argPtrSecond.getByteArray(); + int start1 = argPtrSecond.getStartOffset(); + int len1 = argPtrSecond.getLength(); + + // Gets the third argument. + eval2.evaluate(tuple, argPtrThird); + byte[] bytes2 = argPtrThird.getByteArray(); + int start2 = argPtrThird.getStartOffset(); + int len2 = argPtrThird.getLength(); + + // Gets the fourth argument. + eval3.evaluate(tuple, argPtrFourth); + byte[] bytes3 = argPtrFourth.getByteArray(); + int start3 = argPtrFourth.getStartOffset(); + + // Type check. + if (bytes0[start0] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new TypeMismatchException(funcID, 0, bytes0[start0], ATypeTag.SERIALIZED_STRING_TYPE_TAG); + } + if (bytes1[start1] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new TypeMismatchException(funcID, 1, bytes1[start1], ATypeTag.SERIALIZED_STRING_TYPE_TAG); + } + if (bytes2[start2] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new TypeMismatchException(funcID, 2, bytes2[start2], ATypeTag.SERIALIZED_STRING_TYPE_TAG); + } + if (bytes3[start3] != ATypeTag.SERIALIZED_INT8_TYPE_TAG && bytes3[start3] != ATypeTag.SERIALIZED_INT16_TYPE_TAG + && bytes3[start3] != ATypeTag.SERIALIZED_INT32_TYPE_TAG + && bytes3[start3] != ATypeTag.SERIALIZED_INT64_TYPE_TAG) { + throw new TypeMismatchException(funcID, 3, bytes3[start3], ATypeTag.SERIALIZED_INT8_TYPE_TAG, + ATypeTag.SERIALIZED_INT16_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG, + ATypeTag.SERIALIZED_INT64_TYPE_TAG); + } + + // Sets argument UTF8Pointables. + strPtr1st.set(bytes0, start0 + 1, len0 - 1); + strPtr2nd.set(bytes1, start1 + 1, len1 - 1); + strPtr3rd.set(bytes2, start2 + 1, len2 - 1); + + long int4th = ATypeHierarchy.getLongValue(funcID.getName(), 3, bytes3, start3); + + // Resets the output storage. + resultStorage.reset(); + // The actual processing. + process(strPtr1st, strPtr2nd, strPtr3rd, int4th, result); + } + + /** + * The actual processing of a string function. + * + * @param first + * , the first argument. + * @param second + * , the second argument. + * @param third + * , the third argument. + * @param fourth + * , the fourth argument. + * @param resultPointable + * , the result. + * @throws HyracksDataException + */ + protected abstract void process(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable third, + long fourth, IPointable resultPointable) throws HyracksDataException; +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringBoolEval.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringBoolEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringBoolEval.java index 830f2ff..b2da86d 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringBoolEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringBoolEval.java @@ -18,8 +18,6 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider; import org.apache.asterix.om.base.ABoolean; import org.apache.asterix.om.types.BuiltinType; @@ -46,7 +44,7 @@ public abstract class AbstractTripleStringBoolEval extends AbstractTripleStringE @SuppressWarnings("unchecked") @Override protected void process(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable thrid, - IPointable result) throws IOException { + IPointable result) throws HyracksDataException { ABoolean res = compute(first, second, thrid) ? ABoolean.TRUE : ABoolean.FALSE; boolSerde.serialize(res, dout); result.set(resultStorage); @@ -62,9 +60,8 @@ public abstract class AbstractTripleStringBoolEval extends AbstractTripleStringE * @param third * , the second input argument. * @return a boolean value. - * @throws IOException + * @throws HyracksDataException */ protected abstract boolean compute(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable third) - throws IOException; - + throws HyracksDataException; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringEval.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringEval.java index d4a9329..26563af 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringEval.java @@ -20,7 +20,6 @@ package org.apache.asterix.runtime.evaluators.functions; import java.io.DataOutput; -import java.io.IOException; import org.apache.asterix.runtime.exceptions.TypeMismatchException; import org.apache.asterix.om.types.ATypeTag; @@ -43,9 +42,9 @@ abstract class AbstractTripleStringEval implements IScalarEvaluator { private IScalarEvaluator eval2; // Argument pointables. - private IPointable argPtrFirst = new VoidPointable(); - private IPointable argPtrSecond = new VoidPointable(); - private IPointable argPtrThird = new VoidPointable(); + final IPointable argPtrFirst = new VoidPointable(); + final IPointable argPtrSecond = new VoidPointable(); + final IPointable argPtrThird = new VoidPointable(); private final UTF8StringPointable strPtr1st = new UTF8StringPointable(); private final UTF8StringPointable strPtr2nd = new UTF8StringPointable(); private final UTF8StringPointable strPtr3rd = new UTF8StringPointable(); @@ -55,7 +54,7 @@ abstract class AbstractTripleStringEval implements IScalarEvaluator { DataOutput dout = resultStorage.getDataOutput(); // Function ID, for error reporting. - private final FunctionIdentifier funcID; + protected final FunctionIdentifier funcID; AbstractTripleStringEval(IHyracksTaskContext context, IScalarEvaluatorFactory eval0, IScalarEvaluatorFactory eval1, IScalarEvaluatorFactory eval2, FunctionIdentifier funcID) throws HyracksDataException { @@ -87,7 +86,6 @@ abstract class AbstractTripleStringEval implements IScalarEvaluator { int len2 = argPtrThird.getLength(); // Type check. - // Type check. if (bytes0[start0] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { throw new TypeMismatchException(funcID, 0, bytes0[start0], ATypeTag.SERIALIZED_STRING_TYPE_TAG); } @@ -106,11 +104,7 @@ abstract class AbstractTripleStringEval implements IScalarEvaluator { // Resets the output storage. resultStorage.reset(); // The actual processing. - try { - process(strPtr1st, strPtr2nd, strPtr3rd, result); - } catch (IOException e) { - throw new HyracksDataException(e); - } + process(strPtr1st, strPtr2nd, strPtr3rd, result); } /** @@ -121,12 +115,11 @@ abstract class AbstractTripleStringEval implements IScalarEvaluator { * @param second * , the second argument. * @param third - * , the second argument. + * , the third argument. * @param resultPointable * , the result. - * @throws IOException + * @throws HyracksDataException */ protected abstract void process(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable third, - IPointable resultPointable) throws IOException; - + IPointable resultPointable) throws HyracksDataException; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringIntEval.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringIntEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringIntEval.java index 7f0076b..1ee6c8b 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringIntEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringIntEval.java @@ -19,8 +19,6 @@ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider; import org.apache.asterix.om.base.AMutableInt32; import org.apache.asterix.om.types.BuiltinType; @@ -48,7 +46,7 @@ public abstract class AbstractTripleStringIntEval extends AbstractTripleStringEv @SuppressWarnings("unchecked") @Override protected void process(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable thrid, - IPointable result) throws IOException { + IPointable result) throws HyracksDataException { resultValue.setValue(compute(first, second, thrid)); intSerde.serialize(resultValue, dout); result.set(resultStorage); @@ -64,8 +62,8 @@ public abstract class AbstractTripleStringIntEval extends AbstractTripleStringEv * @param third * , the second input argument. * @return an integer value. - * @throws IOException + * @throws HyracksDataException */ protected abstract int compute(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable third) - throws IOException; + throws HyracksDataException; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringStringEval.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringStringEval.java index 48ef5f7..3c73b36 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringStringEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractTripleStringStringEval.java @@ -18,8 +18,6 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider; import org.apache.asterix.om.base.AMutableString; import org.apache.asterix.om.types.BuiltinType; @@ -47,7 +45,7 @@ public abstract class AbstractTripleStringStringEval extends AbstractTripleStrin @SuppressWarnings("unchecked") @Override protected void process(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable thrid, - IPointable result) throws IOException { + IPointable result) throws HyracksDataException { resultValue.setValue(compute(first, second, thrid)); stringSerde.serialize(resultValue, dout); result.set(resultStorage); @@ -63,8 +61,8 @@ public abstract class AbstractTripleStringStringEval extends AbstractTripleStrin * @param third * , the second input argument. * @return a string value. - * @throws IOException + * @throws HyracksDataException */ protected abstract String compute(UTF8StringPointable first, UTF8StringPointable second, UTF8StringPointable third) - throws IOException; + throws HyracksDataException; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpContainsWithFlagDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpContainsWithFlagDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpContainsWithFlagDescriptor.java index 44013e7..a5a79d9 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpContainsWithFlagDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpContainsWithFlagDescriptor.java @@ -22,8 +22,6 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.om.functions.BuiltinFunctions; import org.apache.asterix.om.functions.IFunctionDescriptor; import org.apache.asterix.om.functions.IFunctionDescriptorFactory; @@ -59,7 +57,7 @@ public class StringRegExpContainsWithFlagDescriptor extends AbstractScalarFuncti @Override protected boolean compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, - UTF8StringPointable flagPtr) throws IOException { + UTF8StringPointable flagPtr) { matcher.build(srcPtr, patternPtr, flagPtr); return matcher.find(); } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpLikeWithFlagDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpLikeWithFlagDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpLikeWithFlagDescriptor.java index 83971a0..635b4a6 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpLikeWithFlagDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpLikeWithFlagDescriptor.java @@ -19,8 +19,6 @@ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.om.functions.BuiltinFunctions; import org.apache.asterix.om.functions.IFunctionDescriptor; import org.apache.asterix.om.functions.IFunctionDescriptorFactory; @@ -56,7 +54,7 @@ public class StringRegExpLikeWithFlagDescriptor extends AbstractScalarFunctionDy @Override protected boolean compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, - UTF8StringPointable flagPtr) throws IOException { + UTF8StringPointable flagPtr) { matcher.build(srcPtr, patternPtr, flagPtr); return matcher.matches(); } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java index 16d428f..0d910be 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java @@ -19,8 +19,6 @@ package org.apache.asterix.runtime.evaluators.functions; -import java.io.IOException; - import org.apache.asterix.om.functions.BuiltinFunctions; import org.apache.asterix.om.functions.IFunctionDescriptor; import org.apache.asterix.om.functions.IFunctionDescriptorFactory; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceDescriptor.java index fb2e2da..a1ea169 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceDescriptor.java @@ -55,7 +55,7 @@ public class StringRegExpReplaceDescriptor extends AbstractScalarFunctionDynamic @Override protected String compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, - UTF8StringPointable replacePtr) throws IOException { + UTF8StringPointable replacePtr) { matcher.build(srcPtr, patternPtr); return matcher.replace(replacePtr); } @@ -66,6 +66,6 @@ public class StringRegExpReplaceDescriptor extends AbstractScalarFunctionDynamic @Override public FunctionIdentifier getIdentifier() { - return BuiltinFunctions.STRING_REPLACE; + return BuiltinFunctions.STRING_REGEXP_REPLACE; } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceWithFlagsDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceWithFlagsDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceWithFlagsDescriptor.java index 71383fc..f938c24 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceWithFlagsDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpReplaceWithFlagsDescriptor.java @@ -66,6 +66,6 @@ public class StringRegExpReplaceWithFlagsDescriptor extends AbstractScalarFuncti @Override public FunctionIdentifier getIdentifier() { - return BuiltinFunctions.STRING_REPLACE_WITH_FLAG; + return BuiltinFunctions.STRING_REGEXP_REPLACE_WITH_FLAG; } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceDescriptor.java new file mode 100644 index 0000000..098d98f --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceDescriptor.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.asterix.runtime.evaluators.functions.utils.StringReplacer; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringReplaceDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringReplaceDescriptor(); + } + }; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException { + return new AbstractTripleStringEval(ctx, args[0], args[1], args[2], getIdentifier()) { + + final StringReplacer replacer = new StringReplacer(); + + @Override + protected void process(UTF8StringPointable first, UTF8StringPointable second, + UTF8StringPointable third, IPointable resultPointable) throws HyracksDataException { + if (replacer.findAndReplace(first, second, third, Integer.MAX_VALUE)) { + replacer.assignResult(resultPointable); + } else { + resultPointable.set(argPtrFirst); + } + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return BuiltinFunctions.STRING_REPLACE; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceWithLimitDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceWithLimitDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceWithLimitDescriptor.java new file mode 100644 index 0000000..fcd8218 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringReplaceWithLimitDescriptor.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.asterix.runtime.evaluators.functions.utils.StringReplacer; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringReplaceWithLimitDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringReplaceWithLimitDescriptor(); + } + }; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException { + return new AbstractStringStringStringIntEval(ctx, args[0], args[1], args[2], args[3], getIdentifier()) { + + final StringReplacer replacer = new StringReplacer(); + + @Override + protected void process(UTF8StringPointable first, UTF8StringPointable second, + UTF8StringPointable third, long fourth, IPointable resultPointable) + throws HyracksDataException { + if (replacer.findAndReplace(first, second, third, (int) fourth)) { + replacer.assignResult(resultPointable); + } else { + resultPointable.set(argPtrFirst); + } + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return BuiltinFunctions.STRING_REPLACE_WITH_LIMIT; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringReplacer.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringReplacer.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringReplacer.java new file mode 100644 index 0000000..4e7e2f7 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/StringReplacer.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions.utils; + +import java.io.IOException; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +/** + * A wrapper for string replace methods. + */ +public final class StringReplacer { + // For outputting the result. + private final UTF8StringBuilder resultBuilder = new UTF8StringBuilder(); + private final GrowableArray resultArray = new GrowableArray(); + private final int resultArrayInitLength; + + public StringReplacer() throws HyracksDataException { + try { + resultArray.getDataOutput().writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + resultArrayInitLength = resultArray.getLength(); + } catch (IOException e) { + throw HyracksDataException.create(e); + } + } + + public boolean findAndReplace(UTF8StringPointable input, UTF8StringPointable search, UTF8StringPointable replace, + int limit) throws HyracksDataException { + try { + resultArray.setSize(resultArrayInitLength); + return input.findAndReplace(search, replace, limit, resultBuilder, resultArray); + } catch (IOException e) { + throw HyracksDataException.create(e); + } + } + + public void assignResult(IPointable resultPointable) { + resultPointable.set(resultArray.getByteArray(), 0, resultArray.getLength()); + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java index b94f55e..df09ca6 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java @@ -243,6 +243,8 @@ import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithF import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagsDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringReplaceDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringReplaceWithLimitDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringSplitDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor; @@ -571,6 +573,8 @@ public final class FunctionCollection { fc.addGenerated(StringRTrim2Descriptor.FACTORY); fc.addGenerated(StringPositionDescriptor.FACTORY); fc.addGenerated(StringRepeatDescriptor.FACTORY); + fc.addGenerated(StringReplaceDescriptor.FACTORY); + fc.addGenerated(StringReplaceWithLimitDescriptor.FACTORY); fc.addGenerated(StringSplitDescriptor.FACTORY); // Constructors http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c8ba268f/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java index 86a6f9c..8fdcd83 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java @@ -19,7 +19,8 @@ package org.apache.hyracks.data.std.primitive; import java.io.IOException; -import java.nio.charset.Charset; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import org.apache.commons.lang3.CharSet; import org.apache.hyracks.api.dataflow.value.ITypeTraits; @@ -161,7 +162,11 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas @Override public String toString() { - return new String(this.bytes, this.getCharStartOffset(), this.getUTF8Length(), Charset.forName("UTF-8")); + try { + return new String(bytes, getCharStartOffset(), getUTF8Length(), StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } } /**** @@ -218,7 +223,7 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas char ch2 = pattern.charAt(pttnStart + c2); if (ch1 != ch2) { - if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { + if (!ignoreCase || Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { break; } } @@ -261,7 +266,7 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas char ch1 = src.charAt(s1Start + c1); char ch2 = pattern.charAt(s2Start + c2); if (ch1 != ch2) { - if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { + if (!ignoreCase || Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { break; } } @@ -292,7 +297,7 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas char ch2 = pattern.charAt(s2Start + c2); if (ch1 != ch2) { - if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { + if (!ignoreCase || Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) { break; } } @@ -582,4 +587,44 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas builder.finish(); } + public boolean findAndReplace(UTF8StringPointable searchPtr, UTF8StringPointable replacePtr, int replaceLimit, + UTF8StringBuilder builder, GrowableArray out) throws IOException { + return findAndReplace(this, searchPtr, replacePtr, replaceLimit, builder, out); + } + + public static boolean findAndReplace(UTF8StringPointable srcPtr, UTF8StringPointable searchPtr, + UTF8StringPointable replacePtr, int replaceLimit, UTF8StringBuilder builder, GrowableArray out) + throws IOException { + if (replaceLimit < 1) { + return false; + } + int curIdx = find(srcPtr, searchPtr, false); + if (curIdx < 0) { + return false; + } + int searchUtfLen = searchPtr.getUTF8Length(); + int replaceUtfLen = replacePtr.getUTF8Length(); + int estimatedLen = searchUtfLen > 0 && replaceUtfLen > searchUtfLen + ? (int) (((long) srcPtr.getUTF8Length()) * replaceUtfLen / searchUtfLen) : srcPtr.getUTF8Length(); + builder.reset(out, estimatedLen); + builder.appendUtf8StringPointable(srcPtr, srcPtr.getCharStartOffset(), curIdx); + builder.appendUtf8StringPointable(replacePtr); + + curIdx += searchUtfLen; + int limit = replaceLimit - 1; + + int nextIdx; + while (limit > 0 && (nextIdx = find(srcPtr, searchPtr, false, curIdx)) > 0) { + builder.appendUtf8StringPointable(srcPtr, srcPtr.getCharStartOffset() + curIdx, nextIdx - curIdx); + builder.appendUtf8StringPointable(replacePtr); + curIdx = nextIdx + searchUtfLen; + limit--; + } + builder.appendUtf8StringPointable(srcPtr, srcPtr.getCharStartOffset() + curIdx, + srcPtr.getUTF8Length() - curIdx); + + builder.finish(); + + return true; + } }