>From Janhavi Tripurwar <[email protected]>:
Janhavi Tripurwar has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19828 )
Change subject: WIP: LPAD Function
......................................................................
WIP: LPAD Function
Change-Id: Ia74279fbbe9d23538c057cbbae9538cb219da1f2
---
A
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadEvaluator.java
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
A
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadDescriptor.java
M
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
4 files changed, 266 insertions(+), 0 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/28/19828/1
diff --git
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 1250eb9..1c882a2 100644
---
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -398,6 +398,7 @@
public static final FunctionIdentifier STRING_REPEAT =
FunctionConstants.newAsterix("repeat", 2);
public static final FunctionIdentifier STRING_SPLIT =
FunctionConstants.newAsterix("split", 2);
public static final FunctionIdentifier STRING_PARSE_JSON =
FunctionConstants.newAsterix("parse-json", 1);
+ public static final FunctionIdentifier STRING_LPAD =
FunctionConstants.newAsterix("lpad", 3);
public static final FunctionIdentifier DATASET =
FunctionConstants.newAsterix("dataset",
FunctionIdentifier.VARARGS); // 1, 2 or 3
@@ -1338,6 +1339,8 @@
addFunction(GET_TYPE, AStringTypeComputer.INSTANCE, true);
+ addFunction(STRING_LPAD,
UniformInputTypeComputer.STRING_STRING_INSTANCE, true);
+
addPrivateFunction(EQ, BooleanFunctionTypeComputer.INSTANCE, true);
addPrivateFunction(LE, BooleanFunctionTypeComputer.INSTANCE, true);
addPrivateFunction(GE, BooleanFunctionTypeComputer.INSTANCE, true);
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadDescriptor.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadDescriptor.java
new file mode 100644
index 0000000..750fe30
--- /dev/null
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadDescriptor.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IEvaluatorContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class StringLpadDescriptor extends
AbstractScalarFunctionDynamicDescriptor {
+ private static final long serialVersionUID = 1L;
+
+ public static final IFunctionDescriptorFactory FACTORY =
StringLpadDescriptor::new;
+
+ @Override
+ public FunctionIdentifier getIdentifier() {
+ return BuiltinFunctions.STRING_LPAD;
+ }
+
+ @Override
+ public IScalarEvaluatorFactory
createEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+ return new IScalarEvaluatorFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IScalarEvaluator createScalarEvaluator(IEvaluatorContext
ctx) throws HyracksDataException {
+ return new StringLpadEvaluator(ctx, args[0], args[1], args[2],
StringLpadDescriptor.this.getIdentifier(), sourceLoc) {
+
+// @Override
+// protected void process() throws HyracksDataException {
+//
+// }
+ };
+ }
+ };
+ }
+}
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadEvaluator.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadEvaluator.java
new file mode 100644
index 0000000..b64aca5
--- /dev/null
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLpadEvaluator.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.exceptions.ExceptionUtil;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.runtime.evaluators.common.ArgumentUtils;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IEvaluatorContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.SourceLocation;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class StringLpadEvaluator implements IScalarEvaluator {
+
+ private final IEvaluatorContext ctx;
+
+ private final IScalarEvaluator eval0;
+ private final IScalarEvaluator eval1;
+ private final IScalarEvaluator eval2;
+
+ // Argument pointers.
+ final IPointable argPtrFirst = new VoidPointable();
+ final IPointable argPtrSecond = new VoidPointable();
+ final IPointable argPtrThird = new VoidPointable();
+ private final AMutableInt32 mutableInt = new AMutableInt32(0);
+
+ // For outputting results.
+ private ArrayBackedValueStorage resultStorage = new
ArrayBackedValueStorage();
+ private DataOutput out = resultStorage.getDataOutput();
+ private byte[] tempLengthArray = new byte[5];
+
+ private final FunctionIdentifier funcID;
+ protected final SourceLocation sourceLoc;
+
+ public StringLpadEvaluator(IEvaluatorContext context,
IScalarEvaluatorFactory eval0,
+ IScalarEvaluatorFactory eval1,
IScalarEvaluatorFactory eval2, FunctionIdentifier funcID, SourceLocation
sourceLoc)
+ throws HyracksDataException {
+ this.eval0 = eval0.createScalarEvaluator(context);
+ this.eval1 = eval1.createScalarEvaluator(context);
+ this.eval2 = eval2.createScalarEvaluator(context);
+ this.funcID = funcID;
+ this.sourceLoc = sourceLoc;
+ this.ctx = context;
+ }
+
+ @Override
+ public void evaluate(IFrameTupleReference tuple, IPointable result) throws
HyracksDataException {
+ resultStorage.reset();
+ // Gets the arguments
+ eval0.evaluate(tuple, argPtrFirst);
+ eval1.evaluate(tuple, argPtrSecond);
+ eval2.evaluate(tuple, argPtrThird);
+
+ if (PointableHelper.checkAndSetMissingOrNull(result, argPtrFirst,
argPtrSecond, argPtrThird)) {
+ return;
+ }
+
+ //base string
+ byte[] bytes0 = argPtrFirst.getByteArray();
+ int start0 = argPtrFirst.getStartOffset();
+
+ if (bytes0[start0] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+ PointableHelper.setNull(result);
+ ExceptionUtil.warnTypeMismatch(ctx, sourceLoc, funcID,
bytes0[start0], 0, ATypeTag.STRING);
+ return;
+ }
+
+ //target length
+ byte[] bytes1 = argPtrSecond.getByteArray();
+ int start1 = argPtrSecond.getStartOffset();
+
+ if (bytes1[start1] != ATypeTag.SERIALIZED_INT64_TYPE_TAG) {
+ PointableHelper.setNull(result);
+ ExceptionUtil.warnTypeMismatch(ctx, sourceLoc, funcID,
bytes1[start1], 1, ATypeTag.INTEGER);
+ return;
+ }
+
+ // Gets the target length.
+ if (!ArgumentUtils.setInteger(ctx, sourceLoc, funcID, 1, bytes1,
start1, mutableInt)) {
+ PointableHelper.setNull(result);
+ return;
+ }
+ int targetLength = mutableInt.getIntegerValue();
+
+ //padding string
+ byte[] bytes2 = argPtrThird.getByteArray();
+ int start2 = argPtrThird.getStartOffset();
+
+ if (bytes2[start2] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+ PointableHelper.setNull(result);
+ ExceptionUtil.warnTypeMismatch(ctx, sourceLoc, funcID,
bytes2[start2], 2, ATypeTag.STRING);
+ return;
+ }
+
+ int originalLength = UTF8StringUtil.getUTFLength(bytes0, start0+1);
+ //no. of bytes used to encode the target length
+ int cbytes = UTF8StringUtil.encodeUTF8Length(targetLength,
tempLengthArray, 0);
+
+ try {
+ // Write type tag and encoded targetLength prefix first
+ out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+ out.write(tempLengthArray, 0, cbytes); // cbytes is derived from
targetLength
+ if(targetLength == originalLength){
+ //no change
+ int inputStringStart = start0 + 1 +
UTF8StringUtil.getNumBytesToStoreLength(targetLength);
+ out.write(bytes0,inputStringStart, targetLength);
+ } else if(targetLength < originalLength) {
+ //truncate
+ // Start of actual character data in input
+ int currentBytePosInText = start0 + 1 +
UTF8StringUtil.getNumBytesToStoreLength(originalLength);
+ int charsWritten = 0;
+ for (int i = 0; i < originalLength && charsWritten <
targetLength; i++) {
+ int singleCharByteLength = UTF8StringUtil.charSize(bytes0,
currentBytePosInText);
+ out.write(bytes0, currentBytePosInText,
singleCharByteLength);
+ currentBytePosInText += singleCharByteLength;
+ charsWritten++;
+ }
+ } else {
+ // pad
+ int numCharsToPad = targetLength - originalLength;
+
+ // padding string length field (type tag, length, ....)
+ int padCharCount = UTF8StringUtil.getUTFLength(bytes2, start2
+ 1);
+
+ // bytes used for padding string length
+ int padPrefixBytes =
UTF8StringUtil.getNumBytesToStoreLength(bytes2, start2 + 1);
+ // padding string data start offset
+ int padDataOffset = start2 + 1 + padPrefixBytes;
+
+ int padReadOff = padDataOffset;
+ int padCurrCycle = 0;
+
+ // write the padding characters one by one
+ for (int i = 0; i < numCharsToPad; i++) {
+ // reset if pad string fully consumed
+ if (padCurrCycle == padCharCount) {
+ padReadOff = padDataOffset;
+ padCurrCycle = 0;
+ }
+
+ // byte length of current char in padding string
+ int singlePadCharByteLen = UTF8StringUtil.charSize(bytes2,
padReadOff);
+
+ // write singlePadCharByteLen bytes from padReadOff
+ out.write(bytes2, padReadOff, singlePadCharByteLen);
+
+ // move read pointer in pad string content
+ padReadOff += singlePadCharByteLen;
+ padCurrCycle++;
+ }
+
+ // write the base string's content (bytes0)
+ // bytes used for base string length and the len is at string
start start0 + 1
+ int basePrefixBytes =
UTF8StringUtil.getNumBytesToStoreLength(bytes0, start0 + 1);
+ // base string data start offset
+ int baseDataOff = start0 + 1 + basePrefixBytes;
+
+ // write originalLength bytes from baseDataOff
+ out.write(bytes0, baseDataOff, originalLength);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ result.set(resultStorage);
+ }
+}
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
index f6ef58f..434a4fc 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
@@ -480,6 +480,7 @@
import org.apache.asterix.runtime.evaluators.functions.StringLengthDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringLikeDescriptor;
import
org.apache.asterix.runtime.evaluators.functions.StringLowerCaseDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringLpadDescriptor;
import
org.apache.asterix.runtime.evaluators.functions.StringPositionDescriptor;
import
org.apache.asterix.runtime.evaluators.functions.StringPositionOffset1Descriptor;
import org.apache.asterix.runtime.evaluators.functions.StringRTrim2Descriptor;
@@ -1142,6 +1143,7 @@
fc.add(StringReplaceWithLimitDescriptor.FACTORY);
fc.add(StringReverseDescriptor.FACTORY);
fc.add(StringSplitDescriptor.FACTORY);
+ fc.add(StringLpadDescriptor.FACTORY);
// Constructors
fc.add(ABooleanConstructorDescriptor.FACTORY);
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19828
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: ionic
Gerrit-Change-Id: Ia74279fbbe9d23538c057cbbae9538cb219da1f2
Gerrit-Change-Number: 19828
Gerrit-PatchSet: 1
Gerrit-Owner: Janhavi Tripurwar <[email protected]>
Gerrit-MessageType: newchange