Luo Chen has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/2573

Change subject: [ASTERIXDB-2357] ADMParser Improvements
......................................................................

[ASTERIXDB-2357] ADMParser Improvements

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- The current ADMParser heavily relies on string operations,
which results in a lot of objects being created. This patch optimize
this by directly operating on char[].

Change-Id: I106b58e79746b0a6f3d8b79473202653341a7009
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
M 
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
M 
asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
M 
hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
10 files changed, 350 insertions(+), 203 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/73/2573/1

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
index 216cadb..eab8a92 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
@@ -39,7 +39,6 @@
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.base.AGeometry;
 import org.apache.asterix.om.base.ANull;
 import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
 import org.apache.asterix.om.types.AOrderedListType;
@@ -47,13 +46,13 @@
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.AUnionType;
 import org.apache.asterix.om.types.AUnorderedListType;
-import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.IAType;
 import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
 import org.apache.asterix.om.types.hierachy.ITypeConvertComputer;
 import org.apache.asterix.om.util.container.IObjectPool;
 import org.apache.asterix.om.util.container.ListObjectPool;
 import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
+import org.apache.asterix.runtime.operators.file.adm.AdmLexer.TokenImage;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
@@ -73,6 +72,8 @@
             new ListObjectPool<IAsterixListBuilder, ATypeTag>(new 
ListBuilderFactory());
     private final IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool =
             new ListObjectPool<IMutableValueStorage, ATypeTag>(new 
AbvsBuilderFactory());
+
+    private final TokenImage tmpTokenImage = new TokenImage();
 
     private final String mismatchErrorMessage = "Mismatch Type, expecting a 
value of type ";
     private final String mismatchErrorMessage2 = " got a value of type ";
@@ -256,15 +257,18 @@
                 break;
             case AdmLexer.TOKEN_STRING_LITERAL:
                 if (checkType(ATypeTag.STRING, objectType)) {
-                    String tokenImage =
-                            admLexer.getLastTokenImage().substring(1, 
admLexer.getLastTokenImage().length() - 1);
-                    aString.setValue(admLexer.containsEscapes() ? 
replaceEscapes(tokenImage) : tokenImage);
-                    stringSerde.serialize(aString, out);
+                    admLexer.getLastTokenImage(tmpTokenImage);
+                    if (admLexer.containsEscapes()) {
+                        replaceEscapes(tmpTokenImage);
+                    }
+                    int begin = tmpTokenImage.getBegin() + 1;
+                    int len = tmpTokenImage.getLength() - 2;
+                    parseString(tmpTokenImage.getBuffer(), begin, len, out);
                 } else if (checkType(ATypeTag.UUID, objectType)) {
                     // Dealing with UUID type that is represented by a string
-                    String tokenImage =
-                            admLexer.getLastTokenImage().substring(1, 
admLexer.getLastTokenImage().length() - 1);
-                    aUUID.parseUUIDString(tokenImage);
+                    admLexer.getLastTokenImage(tmpTokenImage);
+                    aUUID.parseUUIDString(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin() + 1,
+                            tmpTokenImage.getLength() - 2);
                     uuidSerde.serialize(aUUID, out);
                 } else if (checkType(ATypeTag.GEOMETRY, objectType)) {
                     // Parse the string as a WKT-encoded geometry
@@ -373,13 +377,13 @@
 
     }
 
-    private String replaceEscapes(String tokenImage) throws ParseException {
-        char[] chars = tokenImage.toCharArray();
-        int len = chars.length;
-        int readpos = 0;
-        int writepos = 0;
-        int movemarker = 0;
-        while (readpos < len) {
+    private void replaceEscapes(TokenImage tokenImage) throws ParseException {
+        char[] chars = tokenImage.getBuffer();
+        int end = tokenImage.getBegin() + tokenImage.getLength();
+        int readpos = tokenImage.getBegin();
+        int writepos = tokenImage.getBegin();
+        int movemarker = tokenImage.getBegin();
+        while (readpos < end) {
             if (chars[readpos] == '\\') {
                 moveChars(chars, movemarker, readpos, readpos - writepos);
                 switch (chars[readpos + 1]) {
@@ -416,8 +420,8 @@
             ++writepos;
             ++readpos;
         }
-        moveChars(chars, movemarker, len, readpos - writepos);
-        return new String(chars, 0, len - (readpos - writepos));
+        moveChars(chars, movemarker, end, readpos - writepos);
+        tokenImage.reset(chars, tokenImage.getBegin(), end - (readpos - 
writepos));
     }
 
     private static void moveChars(char[] chars, int start, int end, int 
offset) {
@@ -517,16 +521,16 @@
                     expectingRecordField = false;
 
                     if (recType != null) {
-                        String fldName =
-                                admLexer.getLastTokenImage().substring(1, 
admLexer.getLastTokenImage().length() - 1);
+                        admLexer.getLastTokenImage(tmpTokenImage);
+                        String fldName = new String(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin() + 1,
+                                tmpTokenImage.getLength() - 2);
                         fieldId = recBuilder.getFieldId(fldName);
                         if ((fieldId < 0) && !recType.isOpen()) {
                             throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_EXTRA_FIELD_IN_CLOSED_RECORD,
                                     fldName);
                         } else if ((fieldId < 0) && recType.isOpen()) {
-                            
aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
-                                    admLexer.getLastTokenImage().length() - 
1));
-                            stringSerde.serialize(aStringFieldName, 
fieldNameBuffer.getDataOutput());
+                            parseString(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin() + 1,
+                                    tmpTokenImage.getLength() - 2, 
fieldNameBuffer.getDataOutput());
                             openRecordField = true;
                             fieldType = null;
                         } else {
@@ -536,9 +540,9 @@
                             openRecordField = false;
                         }
                     } else {
-                        aStringFieldName.setValue(
-                                admLexer.getLastTokenImage().substring(1, 
admLexer.getLastTokenImage().length() - 1));
-                        stringSerde.serialize(aStringFieldName, 
fieldNameBuffer.getDataOutput());
+                        admLexer.getLastTokenImage(tmpTokenImage);
+                        parseString(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin() + 1,
+                                tmpTokenImage.getLength() - 2, 
fieldNameBuffer.getDataOutput());
                         openRecordField = true;
                         fieldType = null;
                     }
@@ -816,7 +820,13 @@
 
     private void parseToNumericTarget(ATypeTag typeTag, IAType objectType, 
DataOutput out) throws IOException {
         ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
-        if ((targetTypeTag == null) || 
!parseValue(admLexer.getLastTokenImage(), targetTypeTag, out)) {
+        boolean parsed = false;
+        if (targetTypeTag != null) {
+            admLexer.getLastTokenImage(tmpTokenImage);
+            parsed = parseValue(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin(), tmpTokenImage.getLength(),
+                    targetTypeTag, out);
+        }
+        if (!parsed) {
             throw new ParseException(mismatchErrorMessage + 
objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
         }
     }
@@ -828,8 +838,13 @@
             castBuffer.reset();
             dataOutput = castBuffer.getDataOutput();
         }
-
-        if ((targetTypeTag == null) || 
!parseValue(admLexer.getLastTokenImage(), typeTag, dataOutput)) {
+        boolean parsed = false;
+        if (targetTypeTag != null) {
+            admLexer.getLastTokenImage(tmpTokenImage);
+            parsed = parseValue(tmpTokenImage.getBuffer(), 
tmpTokenImage.getBegin(), tmpTokenImage.getLength(), typeTag,
+                    dataOutput);
+        }
+        if (!parsed) {
             throw new ParseException(mismatchErrorMessage + 
objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
         }
 
@@ -871,9 +886,11 @@
             if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
                 token = admLexer.next();
                 if (token == AdmLexer.TOKEN_STRING_LITERAL) {
-                    String unquoted =
-                            admLexer.getLastTokenImage().substring(1, 
admLexer.getLastTokenImage().length() - 1);
-                    if (!parseValue(unquoted, typeTag, dataOutput)) {
+                    admLexer.getLastTokenImage(tmpTokenImage);
+                    int begin = tmpTokenImage.getBegin() + 1;
+                    int len = tmpTokenImage.getLength() - 2;
+                    // unquoted value
+                    if (!parseValue(tmpTokenImage.getBuffer(), begin, len, 
typeTag, dataOutput)) {
                         throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_CONSTRUCTOR_MISSING_DESERIALIZER,
                                 AdmLexer.tokenKindToString(token));
                     }
@@ -899,85 +916,86 @@
                 objectType.getTypeName() + " got " + typeTag);
     }
 
-    private boolean parseValue(String unquoted, ATypeTag typeTag, DataOutput 
out) throws HyracksDataException {
+    private boolean parseValue(char[] buffer, int begin, int len, ATypeTag 
typeTag, DataOutput out)
+            throws HyracksDataException {
         switch (typeTag) {
             case BOOLEAN:
-                parseBoolean(unquoted, out);
+                parseBoolean(buffer, begin, len, out);
                 return true;
             case TINYINT:
-                parseInt8(unquoted, out);
+                parseInt8(buffer, begin, len, out);
                 return true;
             case SMALLINT:
-                parseInt16(unquoted, out);
+                parseInt16(buffer, begin, len, out);
                 return true;
             case INTEGER:
-                parseInt32(unquoted, out);
+                parseInt32(buffer, begin, len, out);
                 return true;
             case BIGINT:
-                parseInt64(unquoted, out);
+                parseInt64(buffer, begin, len, out);
                 return true;
             case FLOAT:
-                if ("INF".equals(unquoted)) {
+                if (matches("INF", buffer, begin, len)) {
                     aFloat.setValue(Float.POSITIVE_INFINITY);
-                } else if ("-INF".equals(unquoted)) {
+                } else if (matches("-INF", buffer, begin, len)) {
                     aFloat.setValue(Float.NEGATIVE_INFINITY);
                 } else {
-                    aFloat.setValue(Float.parseFloat(unquoted));
+                    aFloat.setValue(parseFloat(buffer, begin, len));
                 }
                 floatSerde.serialize(aFloat, out);
                 return true;
             case DOUBLE:
-                if ("INF".equals(unquoted)) {
+                if (matches("INF", buffer, begin, len)) {
                     aDouble.setValue(Double.POSITIVE_INFINITY);
-                } else if ("-INF".equals(unquoted)) {
+                } else if (matches("-INF", buffer, begin, len)) {
                     aDouble.setValue(Double.NEGATIVE_INFINITY);
                 } else {
-                    aDouble.setValue(Double.parseDouble(unquoted));
+                    aDouble.setValue(parseDouble(buffer, begin, len));
                 }
                 doubleSerde.serialize(aDouble, out);
                 return true;
             case STRING:
-                aString.setValue(unquoted);
-                stringSerde.serialize(aString, out);
+                parseString(buffer, begin, len, out);
                 return true;
             case TIME:
-                parseTime(unquoted, out);
+                parseTime(buffer, begin, len, out);
                 return true;
             case DATE:
-                parseDate(unquoted, out);
+                parseDate(buffer, begin, len, out);
                 return true;
             case DATETIME:
-                parseDateTime(unquoted, out);
+                parseDateTime(buffer, begin, len, out);
                 return true;
             case DURATION:
-                parseDuration(unquoted, out);
+                parseDuration(buffer, begin, len, out);
                 return true;
             case DAYTIMEDURATION:
-                parseDateTimeDuration(unquoted, out);
+                parseDateTimeDuration(buffer, begin, len, out);
                 return true;
             case YEARMONTHDURATION:
-                parseYearMonthDuration(unquoted, out);
+                parseYearMonthDuration(buffer, begin, len, out);
                 return true;
             case POINT:
-                parsePoint(unquoted, out);
+                parsePoint(buffer, begin, len, out);
                 return true;
             case POINT3D:
-                parse3DPoint(unquoted, out);
+                parse3DPoint(buffer, begin, len, out);
                 return true;
             case CIRCLE:
-                parseCircle(unquoted, out);
+                parseCircle(buffer, begin, len, out);
                 return true;
             case RECTANGLE:
-                parseRectangle(unquoted, out);
+                parseRectangle(buffer, begin, len, out);
                 return true;
             case LINE:
-                parseLine(unquoted, out);
+                parseLine(buffer, begin, len, out);
                 return true;
             case POLYGON:
-                APolygonSerializerDeserializer.parse(unquoted, out);
+                //TODO: optimize
+                APolygonSerializerDeserializer.parse(new String(buffer, begin, 
len), out);
                 return true;
             case UUID:
-                aUUID.parseUUIDString(unquoted);
+                aUUID.parseUUIDString(buffer, begin, len);
                 uuidSerde.serialize(aUUID, out);
                 return true;
             default:
@@ -985,32 +1003,43 @@
         }
     }
 
-    private void parseBoolean(String bool, DataOutput out) throws 
HyracksDataException {
-        if (bool.equals("true")) {
+    private boolean matches(String value, char[] buffer, int begin, int len) {
+        if (len != value.length()) {
+            return false;
+        }
+        for (int i = 0; i < len; i++) {
+            if (value.charAt(i) != buffer[i + begin]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private void parseBoolean(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
+        if (matches("true", buffer, begin, len)) {
             booleanSerde.serialize(ABoolean.TRUE, out);
-        } else if (bool.equals("false")) {
+        } else if (matches("false", buffer, begin, len)) {
             booleanSerde.serialize(ABoolean.FALSE, out);
         } else {
             throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "boolean");
         }
     }
 
-    private void parseInt8(String int8, DataOutput out) throws 
HyracksDataException {
+    private void parseInt8(char[] buffer, int begin, int len, DataOutput out) 
throws HyracksDataException {
         boolean positive = true;
         byte value = 0;
-        int offset = 0;
+        int offset = begin;
 
-        if (int8.charAt(offset) == '+') {
+        if (buffer[offset] == '+') {
             offset++;
-        } else if (int8.charAt(offset) == '-') {
+        } else if (buffer[offset] == '-') {
             offset++;
             positive = false;
         }
-        for (; offset < int8.length(); offset++) {
-            if ((int8.charAt(offset) >= '0') && (int8.charAt(offset) <= '9')) {
-                value = (byte) (((value * 10) + int8.charAt(offset)) - '0');
-            } else if ((int8.charAt(offset) == 'i') && (int8.charAt(offset + 
1) == '8')
-                    && ((offset + 2) == int8.length())) {
+        for (; offset < begin + len; offset++) {
+            if ((buffer[offset] >= '0') && (buffer[offset] <= '9')) {
+                value = (byte) (((value * 10) + buffer[offset]) - '0');
+            } else if (buffer[offset] == 'i' && buffer[offset + 1] == '8' && 
offset + 2 == begin + len) {
                 break;
             } else {
                 throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int8");
@@ -1026,22 +1055,22 @@
         int8Serde.serialize(aInt8, out);
     }
 
-    private void parseInt16(String int16, DataOutput out) throws 
HyracksDataException {
+    private void parseInt16(char[] buffer, int begin, int len, DataOutput out) 
throws HyracksDataException {
         boolean positive = true;
         short value = 0;
-        int offset = 0;
+        int offset = begin;
 
-        if (int16.charAt(offset) == '+') {
+        if (buffer[offset] == '+') {
             offset++;
-        } else if (int16.charAt(offset) == '-') {
+        } else if (buffer[offset] == '-') {
             offset++;
             positive = false;
         }
-        for (; offset < int16.length(); offset++) {
-            if ((int16.charAt(offset) >= '0') && (int16.charAt(offset) <= 
'9')) {
-                value = (short) (((value * 10) + int16.charAt(offset)) - '0');
-            } else if ((int16.charAt(offset) == 'i') && (int16.charAt(offset + 
1) == '1')
-                    && (int16.charAt(offset + 2) == '6') && ((offset + 3) == 
int16.length())) {
+        for (; offset < begin + len; offset++) {
+            if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+                value = (short) ((value * 10) + buffer[offset] - '0');
+            } else if (buffer[offset] == 'i' && buffer[offset + 1] == '1' && 
buffer[offset + 2] == '6'
+                    && offset + 3 == begin + len) {
                 break;
             } else {
                 throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int16");
@@ -1057,22 +1086,22 @@
         int16Serde.serialize(aInt16, out);
     }
 
-    private void parseInt32(String int32, DataOutput out) throws 
HyracksDataException {
+    private void parseInt32(char[] buffer, int begin, int len, DataOutput out) 
throws HyracksDataException {
         boolean positive = true;
         int value = 0;
-        int offset = 0;
+        int offset = begin;
 
-        if (int32.charAt(offset) == '+') {
+        if (buffer[offset] == '+') {
             offset++;
-        } else if (int32.charAt(offset) == '-') {
+        } else if (buffer[offset] == '-') {
             offset++;
             positive = false;
         }
-        for (; offset < int32.length(); offset++) {
-            if ((int32.charAt(offset) >= '0') && (int32.charAt(offset) <= 
'9')) {
-                value = (((value * 10) + int32.charAt(offset)) - '0');
-            } else if ((int32.charAt(offset) == 'i') && (int32.charAt(offset + 
1) == '3')
-                    && (int32.charAt(offset + 2) == '2') && ((offset + 3) == 
int32.length())) {
+        for (; offset < begin + len; offset++) {
+            if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+                value = (value * 10) + buffer[offset] - '0';
+            } else if (buffer[offset] == 'i' && buffer[offset + 1] == '3' && 
buffer[offset + 2] == '2'
+                    && offset + 3 == begin + len) {
                 break;
             } else {
                 throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int32");
@@ -1089,22 +1118,22 @@
         int32Serde.serialize(aInt32, out);
     }
 
-    private void parseInt64(String int64, DataOutput out) throws 
HyracksDataException {
+    private void parseInt64(char[] buffer, int begin, int len, DataOutput out) 
throws HyracksDataException {
         boolean positive = true;
         long value = 0;
-        int offset = 0;
+        int offset = begin;
 
-        if (int64.charAt(offset) == '+') {
+        if (buffer[offset] == '+') {
             offset++;
-        } else if (int64.charAt(offset) == '-') {
+        } else if (buffer[offset] == '-') {
             offset++;
             positive = false;
         }
-        for (; offset < int64.length(); offset++) {
-            if ((int64.charAt(offset) >= '0') && (int64.charAt(offset) <= 
'9')) {
-                value = (((value * 10) + int64.charAt(offset)) - '0');
-            } else if ((int64.charAt(offset) == 'i') && (int64.charAt(offset + 
1) == '6')
-                    && (int64.charAt(offset + 2) == '4') && ((offset + 3) == 
int64.length())) {
+        for (; offset < begin + len; offset++) {
+            if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+                value = (value * 10) + buffer[offset] - '0';
+            } else if (buffer[offset] == 'i' && buffer[offset + 1] == '6' && 
buffer[offset + 2] == '4'
+                    && offset + 3 == begin + len) {
                 break;
             } else {
                 throw new 
ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int64");
@@ -1136,4 +1165,4 @@
         admLexer.reInit(new InputStreamReader(in));
         return true;
     }
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
index 88fcc8d..ed90a28 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
@@ -19,7 +19,9 @@
 package org.apache.asterix.external.parser;
 
 import java.io.DataOutput;
+import java.io.IOException;
 
+import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.external.api.IDataParser;
 import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
 import org.apache.asterix.om.base.ABinary;
@@ -73,11 +75,14 @@
 import 
org.apache.asterix.om.base.temporal.ADurationParserFactory.ADurationParseOption;
 import org.apache.asterix.om.base.temporal.ATimeParserFactory;
 import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
+import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.BuiltinType;
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.util.bytes.Base64Parser;
 import org.apache.hyracks.util.bytes.HexParser;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 /**
  * Base class for data parsers. Includes the common set of definitions for
@@ -91,8 +96,10 @@
     protected AMutableInt64 aInt64 = new AMutableInt64(0);
     protected AMutableDouble aDouble = new AMutableDouble(0);
     protected AMutableFloat aFloat = new AMutableFloat(0);
+    @Deprecated
     protected AMutableString aString = new AMutableString("");
     protected AMutableBinary aBinary = new AMutableBinary(null, 0, 0);
+    @Deprecated
     protected AMutableString aStringFieldName = new AMutableString("");
     protected AMutableUUID aUUID = new AMutableUUID();
     protected AMutableGeometry aGeomtry = new AMutableGeometry(null);
@@ -115,6 +122,7 @@
     @SuppressWarnings("unchecked")
     protected ISerializerDeserializer<ADouble> doubleSerde =
             
SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ADOUBLE);
+    @Deprecated
     @SuppressWarnings("unchecked")
     protected ISerializerDeserializer<AString> stringSerde =
             
SerializerDeserializerProvider.INSTANCE.getAStringSerializerDeserializer();
@@ -142,6 +150,9 @@
     @SuppressWarnings("unchecked")
     protected ISerializerDeserializer<ANull> nullSerde =
             
SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ANULL);
+
+    protected final AStringSerializerDeserializer untaggedStringSerde =
+            new AStringSerializerDeserializer(new UTF8StringWriter(), new 
UTF8StringReader());
 
     protected final HexParser hexParser = new HexParser();
     protected final Base64Parser base64Parser = new Base64Parser();
@@ -201,14 +212,14 @@
         this.filename = filename;
     }
 
-    protected void parseTime(String time, DataOutput out) throws 
HyracksDataException {
-        int chrononTimeInMs = ATimeParserFactory.parseTimePart(time, 0, 
time.length());
+    protected void parseTime(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
+        int chrononTimeInMs = ATimeParserFactory.parseTimePart(buffer, begin, 
len);
         aTime.setValue(chrononTimeInMs);
         timeSerde.serialize(aTime, out);
     }
 
-    protected void parseDate(String date, DataOutput out) throws 
HyracksDataException {
-        long chrononTimeInMs = ADateParserFactory.parseDatePart(date, 0, 
date.length());
+    protected void parseDate(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
+        long chrononTimeInMs = ADateParserFactory.parseDatePart(buffer, begin, 
len);
         short temp = 0;
         if (chrononTimeInMs < 0 && chrononTimeInMs % 
GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
             temp = 1;
@@ -217,84 +228,90 @@
         dateSerde.serialize(aDate, out);
     }
 
-    protected void parseDateTime(String datetime, DataOutput out) throws 
HyracksDataException {
+    protected void parseDateTime(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
         // +1 if it is negative (-)
-        short timeOffset = (short) ((datetime.charAt(0) == '-') ? 1 : 0);
 
-        timeOffset += 8;
+        int timeOffset = (buffer[begin] == '-') ? 1 : 0;
 
-        if (datetime.charAt(timeOffset) != 'T') {
+        timeOffset = timeOffset + 8 + begin;
+
+        if (buffer[timeOffset] != 'T') {
             timeOffset += 2;
-            if (datetime.charAt(timeOffset) != 'T') {
+            if (buffer[timeOffset] != 'T') {
                 throw new HyracksDataException("This can not be an instance of 
datetime: missing T");
             }
         }
-        long chrononTimeInMs = ADateParserFactory.parseDatePart(datetime, 0, 
timeOffset);
-        chrononTimeInMs +=
-                ATimeParserFactory.parseTimePart(datetime, timeOffset + 1, 
datetime.length() - timeOffset - 1);
+        long chrononTimeInMs = ADateParserFactory.parseDatePart(buffer, begin, 
timeOffset - begin);
+        chrononTimeInMs += ATimeParserFactory.parseTimePart(buffer, timeOffset 
+ 1, begin + len - timeOffset - 1);
         aDateTime.setValue(chrononTimeInMs);
         datetimeSerde.serialize(aDateTime, out);
     }
 
-    protected void parseDuration(String duration, DataOutput out) throws 
HyracksDataException {
-        ADurationParserFactory.parseDuration(duration, 0, duration.length(), 
aDuration, ADurationParseOption.All);
+    protected void parseDuration(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
+        ADurationParserFactory.parseDuration(buffer, begin, len, aDuration, 
ADurationParseOption.All);
         durationSerde.serialize(aDuration, out);
     }
 
-    protected void parseDateTimeDuration(String durationString, DataOutput 
out) throws HyracksDataException {
-        ADurationParserFactory.parseDuration(durationString, 0, 
durationString.length(), aDayTimeDuration,
-                ADurationParseOption.All);
+    protected void parseDateTimeDuration(char[] buffer, int begin, int len, 
DataOutput out)
+            throws HyracksDataException {
+        ADurationParserFactory.parseDuration(buffer, begin, len, 
aDayTimeDuration, ADurationParseOption.All);
         dayTimeDurationSerde.serialize(aDayTimeDuration, out);
     }
 
-    protected void parseYearMonthDuration(String durationString, DataOutput 
out) throws HyracksDataException {
-        ADurationParserFactory.parseDuration(durationString, 0, 
durationString.length(), aYearMonthDuration,
-                ADurationParseOption.All);
+    protected void parseYearMonthDuration(char[] buffer, int begin, int len, 
DataOutput out)
+            throws HyracksDataException {
+        ADurationParserFactory.parseDuration(buffer, begin, len, 
aYearMonthDuration, ADurationParseOption.All);
         yearMonthDurationSerde.serialize(aYearMonthDuration, out);
     }
 
-    protected void parsePoint(String point, DataOutput out) throws 
HyracksDataException {
+    protected void parsePoint(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
         try {
-            aPoint.setValue(Double.parseDouble(point.substring(0, 
point.indexOf(','))),
-                    Double.parseDouble(point.substring(point.indexOf(',') + 1, 
point.length())));
+            int commaIndex = indexOf(buffer, begin, len, ',');
+            aPoint.setValue(parseDouble(buffer, begin, commaIndex - begin),
+                    parseDouble(buffer, commaIndex + 1, begin + len - 
commaIndex - 1));
             pointSerde.serialize(aPoint, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(point + " can not be an instance of 
point");
+        } catch (Exception e) {
+            throw new HyracksDataException(new String(buffer, begin, len) + " 
can not be an instance of point");
         }
     }
 
-    protected void parse3DPoint(String point3d, DataOutput out) throws 
HyracksDataException {
+    protected void parse3DPoint(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
         try {
-            int firstCommaIndex = point3d.indexOf(',');
-            int secondCommaIndex = point3d.indexOf(',', firstCommaIndex + 1);
-            aPoint3D.setValue(Double.parseDouble(point3d.substring(0, 
firstCommaIndex)),
-                    Double.parseDouble(point3d.substring(firstCommaIndex + 1, 
secondCommaIndex)),
-                    Double.parseDouble(point3d.substring(secondCommaIndex + 1, 
point3d.length())));
+            int firstCommaIndex = indexOf(buffer, begin, len, ',');
+            int secondCommaIndex = indexOf(buffer, firstCommaIndex + 1, begin 
+ len - firstCommaIndex - 1, ',');
+            aPoint3D.setValue(parseDouble(buffer, begin, firstCommaIndex - 
begin),
+                    parseDouble(buffer, firstCommaIndex + 1, secondCommaIndex 
- firstCommaIndex),
+                    parseDouble(buffer, secondCommaIndex + 1, begin + len - 
secondCommaIndex - 1));
             point3DSerde.serialize(aPoint3D, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(point3d + " can not be an instance 
of point3d");
+        } catch (Exception e) {
+            throw new HyracksDataException(new String(buffer, begin, len) + " 
can not be an instance of point3d");
         }
     }
 
-    protected void parseCircle(String circle, DataOutput out) throws 
HyracksDataException {
+    protected void parseCircle(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
         try {
-            String[] parts = circle.split(" ");
-            aPoint.setValue(Double.parseDouble(parts[0].split(",")[0]), 
Double.parseDouble(parts[0].split(",")[1]));
-            aCircle.setValue(aPoint, Double.parseDouble(parts[1].substring(0, 
parts[1].length())));
+            int firstCommaIndex = indexOf(buffer, begin, len, ',');
+            int spaceIndex = indexOf(buffer, firstCommaIndex + 1, begin + len 
- firstCommaIndex - 1, ' ');
+            aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - 
begin),
+                    parseDouble(buffer, firstCommaIndex + 1, spaceIndex - 
firstCommaIndex - 1));
+            aCircle.setValue(aPoint, parseDouble(buffer, spaceIndex + 1, begin 
+ len - spaceIndex - 1));
             circleSerde.serialize(aCircle, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(circle + " can not be an instance 
of circle");
+        } catch (Exception e) {
+            throw new HyracksDataException(new String(buffer, begin, len) + " 
can not be an instance of circle");
         }
     }
 
-    protected void parseRectangle(String rectangle, DataOutput out) throws 
HyracksDataException {
+    protected void parseRectangle(char[] buffer, int begin, int len, 
DataOutput out) throws HyracksDataException {
         try {
-            String[] points = rectangle.split(" ");
-            if (points.length != 2) {
-                throw new HyracksDataException("rectangle consists of only 2 
points.");
-            }
-            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), 
Double.parseDouble(points[0].split(",")[1]));
-            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), 
Double.parseDouble(points[1].split(",")[1]));
+            int spaceIndex = indexOf(buffer, begin, len, ' ');
+
+            int firstCommaIndex = indexOf(buffer, begin, len, ',');
+            aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - 
begin),
+                    parseDouble(buffer, firstCommaIndex + 1, spaceIndex - 
firstCommaIndex - 1));
+
+            int secondCommaIndex = indexOf(buffer, spaceIndex + 1, begin + len 
- spaceIndex - 1, ',');
+            aPoint2.setValue(parseDouble(buffer, spaceIndex + 1, 
secondCommaIndex - spaceIndex - 1),
+                    parseDouble(buffer, secondCommaIndex + 1, begin + len - 
secondCommaIndex - 1));
             if (aPoint.getX() > aPoint2.getX() && aPoint.getY() > 
aPoint2.getY()) {
                 aRectangle.setValue(aPoint2, aPoint);
             } else if (aPoint.getX() < aPoint2.getX() && aPoint.getY() < 
aPoint2.getY()) {
@@ -304,23 +321,24 @@
                         "Rectangle arugment must be either (bottom left point, 
top right point) or (top right point, bottom left point)");
             }
             rectangleSerde.serialize(aRectangle, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(rectangle + " can not be an 
instance of rectangle");
+        } catch (Exception e) {
+            throw new HyracksDataException(new String(buffer, begin, len) + " 
can not be an instance of rectangle");
         }
     }
 
-    protected void parseLine(String line, DataOutput out) throws 
HyracksDataException {
+    protected void parseLine(char[] buffer, int begin, int len, DataOutput 
out) throws HyracksDataException {
         try {
-            String[] points = line.split(" ");
-            if (points.length != 2) {
-                throw new HyracksDataException("line consists of only 2 
points.");
-            }
-            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), 
Double.parseDouble(points[0].split(",")[1]));
-            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), 
Double.parseDouble(points[1].split(",")[1]));
+            int spaceIndex = indexOf(buffer, begin, len, ' ');
+            int firstCommaIndex = indexOf(buffer, begin, len, ',');
+            aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - 
begin),
+                    parseDouble(buffer, firstCommaIndex + 1, spaceIndex - 
firstCommaIndex - 1));
+            int secondCommaIndex = indexOf(buffer, spaceIndex + 1, begin + len 
- spaceIndex - 1, ',');
+            aPoint2.setValue(parseDouble(buffer, spaceIndex + 1, 
secondCommaIndex - spaceIndex - 1),
+                    parseDouble(buffer, secondCommaIndex + 1, begin + len - 
secondCommaIndex - 1));
             aLine.setValue(aPoint, aPoint2);
             lineSerde.serialize(aLine, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(line + " can not be an instance of 
line");
+        } catch (Exception e) {
+            new HyracksDataException(new String(buffer, begin, len) + " can 
not be an instance of line");
         }
     }
 
@@ -363,4 +381,34 @@
 
         return ATimeParserFactory.parseTimePart(interval, startOffset, 
endOffset - startOffset + 1);
     }
-}
+
+    protected double parseDouble(char[] buffer, int begin, int len) {
+        // TODO: parse double directly from char[]
+        String str = new String(buffer, begin, len);
+        return Double.valueOf(str);
+    }
+
+    protected float parseFloat(char[] buffer, int begin, int len) {
+        //TODO: pares float directly from float[]
+        String str = new String(buffer, begin, len);
+        return Float.valueOf(str);
+    }
+
+    protected int indexOf(char[] buffer, int begin, int len, char target) {
+        for (int i = begin; i < begin + len; i++) {
+            if (buffer[i] == target) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    protected void parseString(char[] buffer, int begin, int length, 
DataOutput out) throws HyracksDataException {
+        try {
+            out.writeByte(ATypeTag.STRING.serialize());
+            untaggedStringSerde.serialize(buffer, begin, length, out);
+        } catch (IOException e) {
+            throw HyracksDataException.create(e);
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
index ce8780d..209ba34 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
@@ -426,26 +426,27 @@
      * @throws IOException
      */
     private void serializeString(ATypeTag stringVariantType, DataOutput out) 
throws IOException {
-        final String stringValue = jsonParser.getText();
+        char[] buffer = jsonParser.getTextCharacters();
+        int begin = jsonParser.getTextOffset();
+        int len = jsonParser.getTextLength();
         final ATypeTag typeToUse = stringVariantType == ATypeTag.ANY ? 
currentToken().getTypeTag() : stringVariantType;
 
         switch (typeToUse) {
             case STRING:
-                aString.setValue(stringValue);
-                stringSerde.serialize(aString, out);
+                parseString(buffer, begin, len, out);
                 break;
             case DATE:
-                parseDate(stringValue, out);
+                parseDate(buffer, begin, len, out);
                 break;
             case DATETIME:
-                parseDateTime(stringValue, out);
+                parseDateTime(buffer, begin, len, out);
                 break;
             case TIME:
-                parseTime(stringValue, out);
+                parseTime(buffer, begin, len, out);
                 break;
             default:
                 throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, 
jsonParser.currentToken().toString());
 
         }
     }
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
 
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
index db85e64..a4e4c33 100644
--- 
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
+++ 
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
@@ -27,8 +27,6 @@
 import java.nio.ByteBuffer;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import com.esri.core.geometry.ogc.OGCGeometry;
-import com.esri.core.geometry.ogc.OGCPoint;
 import org.apache.asterix.external.parser.ADMDataParser;
 import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
 import org.apache.asterix.om.base.AGeometry;
@@ -42,12 +40,10 @@
 import org.apache.asterix.om.types.IAType;
 import org.apache.hadoop.io.DataInputByteBuffer;
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameDeserializer;
-import org.apache.hyracks.dataflow.common.comm.io.FrameDeserializingDataReader;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
 import org.junit.Assert;
 import org.junit.Test;
+
+import com.esri.core.geometry.ogc.OGCPoint;
 
 import junit.extensions.PA;
 
@@ -55,19 +51,21 @@
 
     @Test
     public void test() throws IOException {
-        String[] dates = { "-9537-08-04", "9656-06-03", "-9537-04-04", 
"9656-06-04", "-9537-10-04", "9626-09-05" };
+        char[][] dates = toChars(
+                new String[] { "-9537-08-04", "9656-06-03", "-9537-04-04", 
"9656-06-04", "-9537-10-04", "9626-09-05" });
         AMutableDate[] parsedDates =
                 new AMutableDate[] { new AMutableDate(-4202630), new 
AMutableDate(2807408), new AMutableDate(-4202752),
                         new AMutableDate(2807409), new AMutableDate(-4202569), 
new AMutableDate(2796544), };
 
-        String[] times = { "12:04:45.689Z", "12:41:59.002Z", "12:10:45.169Z", 
"15:37:48.736Z", "04:16:42.321Z",
-                "12:22:56.816Z" };
+        char[][] times = toChars(new String[] { "12:04:45.689Z", 
"12:41:59.002Z", "12:10:45.169Z", "15:37:48.736Z",
+                "04:16:42.321Z", "12:22:56.816Z" });
         AMutableTime[] parsedTimes =
                 new AMutableTime[] { new AMutableTime(43485689), new 
AMutableTime(45719002), new AMutableTime(43845169),
                         new AMutableTime(56268736), new 
AMutableTime(15402321), new AMutableTime(44576816), };
 
-        String[] dateTimes = { "-2640-10-11T17:32:15.675Z", 
"4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z",
-                "6778-02-16T22:40:21.653Z", "2129-12-12T13:18:35.758Z", 
"8647-07-01T13:10:19.691Z" };
+        char[][] dateTimes = toChars(
+                new String[] { "-2640-10-11T17:32:15.675Z", 
"4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z",
+                        "6778-02-16T22:40:21.653Z", 
"2129-12-12T13:18:35.758Z", "8647-07-01T13:10:19.691Z" });
         AMutableDateTime[] parsedDateTimes =
                 new AMutableDateTime[] { new 
AMutableDateTime(-145452954464325L), new AMutableDateTime(67345192751902L),
                         new AMutableDateTime(-45286270768513L), new 
AMutableDateTime(151729886421653L),
@@ -88,24 +86,24 @@
                         while (round++ < 10000) {
                             // Test parseDate.
                             for (int index = 0; index < dates.length; ++index) 
{
-                                PA.invokeMethod(parser, 
"parseDate(java.lang.String, java.io.DataOutput)", dates[index],
-                                        dos);
+                                PA.invokeMethod(parser, "parseDate(char[], 
int, int, java.io.DataOutput)", dates[index],
+                                        0, dates[index].length, dos);
                                 AMutableDate aDate = (AMutableDate) 
PA.getValue(parser, "aDate");
                                 
Assert.assertTrue(aDate.equals(parsedDates[index]));
                             }
 
                             // Tests parseTime.
                             for (int index = 0; index < times.length; ++index) 
{
-                                PA.invokeMethod(parser, 
"parseTime(java.lang.String, java.io.DataOutput)", times[index],
-                                        dos);
+                                PA.invokeMethod(parser, "parseTime(char[], 
int, int, java.io.DataOutput)", times[index],
+                                        0, times[index].length, dos);
                                 AMutableTime aTime = (AMutableTime) 
PA.getValue(parser, "aTime");
                                 
Assert.assertTrue(aTime.equals(parsedTimes[index]));
                             }
 
                             // Tests parseDateTime.
                             for (int index = 0; index < dateTimes.length; 
++index) {
-                                PA.invokeMethod(parser, 
"parseDateTime(java.lang.String, java.io.DataOutput)",
-                                        dateTimes[index], dos);
+                                PA.invokeMethod(parser, "parseDateTime(char[], 
int, int, java.io.DataOutput)",
+                                        dateTimes[index], 0, 
dateTimes[index].length, dos);
                                 AMutableDateTime aDateTime = 
(AMutableDateTime) PA.getValue(parser, "aDateTime");
                                 
Assert.assertTrue(aDateTime.equals(parsedDateTimes[index]));
                             }
@@ -130,6 +128,14 @@
         }
         // Asserts no failure.
         Assert.assertTrue(errorCount.get() == 0);
+    }
+
+    private char[][] toChars(String[] strings) {
+        char[][] results = new char[strings.length][];
+        for (int i = 0; i < strings.length; i++) {
+            results[i] = strings[i].toCharArray();
+        }
+        return results;
     }
 
     @Test
@@ -177,4 +183,4 @@
         }
 
     }
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
 
b/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
index 3613166..044852b 100644
--- 
a/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
+++ 
b/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
@@ -124,6 +124,62 @@
         }
     }
 
+    public void getLastTokenImage(TokenImage image) {
+        if (bufpos >= tokenBegin) {
+            image.reset(buffer, tokenBegin, bufpos - tokenBegin);
+        } else {
+            image.reset(buffer, tokenBegin, bufsize - tokenBegin, buffer, 0, 
bufpos);
+        }
+    }
+
+    public static class TokenImage{
+        private char[] buffer;
+        private int begin;
+        private int length;
+
+        // used to hold circular tokens
+        private char[] tmpBuffer;
+
+        private static final double TMP_BUFFER_GROWTH = 1.5;
+
+        public void reset(char[] buffer, int begin, int length) {
+            this.buffer = buffer;
+            this.begin = begin;
+            this.length = length;
+        }
+
+        public void reset(char[] buffer, int begin, int length, char[] 
extraBuffer, int extraBegin, int extraLength) {
+            ensureTmpBufferSize(length + extraLength);
+            System.arraycopy(buffer, begin, tmpBuffer, 0, length);
+            System.arraycopy(extraBuffer, extraBegin, tmpBuffer, length, 
extraLength);
+            this.buffer = tmpBuffer;
+            this.begin = 0;
+            this.length = length + extraLength;
+        }
+
+        public char[] getBuffer() {
+            return buffer;
+        }
+
+        public int getBegin() {
+            return begin;
+        }
+
+        public int getLength() {
+            return length;
+        }
+
+
+        private void ensureTmpBufferSize(int size) {
+            int oldSize = tmpBuffer!=null?tmpBuffer.length:0;
+            if(oldSize < size) {
+                int newSize = Math.max((int)(oldSize * TMP_BUFFER_GROWTH), 
size);
+                tmpBuffer = new char[newSize];
+            }
+        }
+
+    }
+
     public int getColumn() {
         return column;
     }
@@ -262,4 +318,4 @@
         endOf_UNUSED_Buffer = bufsize;
         tokenBegin = 0;
     }
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
index 95aef79..f3bf6d7 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
@@ -24,6 +24,8 @@
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 import 
org.apache.asterix.dataflow.data.nontagged.serde.SerializerDeserializerUtil;
 import org.apache.asterix.om.types.ARecordType;
@@ -54,6 +56,7 @@
 
     private int openPartOffset;
     private ARecordType recType;
+    private Map<String, Integer> fieldName2Id;
 
     private final IBinaryHashFunction utf8HashFunction;
     private final IBinaryComparator utf8Comparator;
@@ -69,7 +72,7 @@
     private int[] openFieldNameLengths;
 
     private int numberOfOpenFields;
-    private RuntimeRecordTypeInfo recTypeInfo;
+    private final RuntimeRecordTypeInfo recTypeInfo;
 
     public RecordBuilder() {
         this.closedPartOutputStream = new ByteArrayAccessibleOutputStream();
@@ -120,6 +123,11 @@
             this.isOpen = recType.isOpen();
             this.containsOptionalField = 
NonTaggedFormatUtil.hasOptionalField(recType);
             this.numberOfSchemaFields = recType.getFieldNames().length;
+            this.fieldName2Id = new HashMap<>();
+            String[] fields = recType.getFieldNames();
+            for (int i = 0; i < fields.length; i++) {
+                fieldName2Id.put(fields[i], i);
+            }
         } else {
             this.isOpen = true;
             this.containsOptionalField = false;
@@ -302,12 +310,7 @@
 
     @Override
     public int getFieldId(String fieldName) {
-        for (int i = 0; i < recType.getFieldNames().length; i++) {
-            if (recType.getFieldNames()[i].equals(fieldName)) {
-                return i;
-            }
-        }
-        return -1;
+        return fieldName2Id.getOrDefault(fieldName, -1);
     }
 
     public IBinaryHashFunction getFieldNameHashFunction() {
@@ -317,4 +320,4 @@
     public IBinaryComparator getFieldNameComparator() {
         return utf8Comparator;
     }
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
index 888b34c..4bb9f08 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
@@ -69,4 +69,8 @@
             throw HyracksDataException.create(e);
         }
     }
-}
+
+    public void serialize(char[] buffer, int start, int length, DataOutput 
out) throws IOException {
+        UTF8StringUtil.writeUTF8(buffer, start, length, out, utf8StringWriter);
+    }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
index 2fb69ab..9a097dc 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
@@ -23,15 +23,16 @@
 
 public class AMutableUUID extends AUUID {
 
-    public void parseUUIDString(String tokenImage) throws HyracksDataException 
{
-        if (tokenImage.length() != UUID_CHARS) {
-            throw new HyracksDataException("This is not a correct UUID value: 
" + tokenImage);
+    private final byte[] hexBytesBuffer = new byte[UUID_CHARS];
+
+    public void parseUUIDString(char[] buffer, int begin, int len) throws 
HyracksDataException {
+        if (len != UUID_CHARS) {
+            throw new HyracksDataException("This is not a correct UUID value: 
" + new String(buffer, begin, len));
         }
-        byte[] hexBytes = new byte[UUID_CHARS];
-        for (int i = 0; i < tokenImage.length(); i++) {
-            hexBytes[i] = (byte) tokenImage.charAt(i);
+        for (int i = 0; i < len; i++) {
+            hexBytesBuffer[i] = (byte) buffer[begin + i];
         }
-        parseUUIDHexBytes(hexBytes, 0);
+        parseUUIDHexBytes(hexBytesBuffer, 0);
     }
 
     public void parseUUIDHexBytes(byte[] serString, int offset) throws 
HyracksDataException {
@@ -122,4 +123,4 @@
         }
     }
 
-}
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
index 291d963..39f5b3a 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
@@ -63,7 +63,6 @@
      * @throws HyracksDataException
      */
     public static int parseTimePart(String timeString, int start, int length) 
throws HyracksDataException {
-
         int offset = 0;
 
         int hour = 0, min = 0, sec = 0, millis = 0;
@@ -518,4 +517,4 @@
         return timezone;
     }
 
-}
+}
\ No newline at end of file
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 11fb6c0..78fdff1 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -595,7 +595,7 @@
         out.write(tempBytes, 0, count);
     }
 
-    static void writeUTF8(char[] buffer, int start, int length, DataOutput 
out, UTF8StringWriter writer)
+    public static void writeUTF8(char[] buffer, int start, int length, 
DataOutput out, UTF8StringWriter writer)
             throws IOException {
         int utflen = 0;
         int count = 0;

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/2573
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I106b58e79746b0a6f3d8b79473202653341a7009
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Luo Chen <[email protected]>

Reply via email to