Changeset: d6792323e680 for monetdb-java
URL: https://dev.monetdb.org/hg/monetdb-java?cmd=changeset;node=d6792323e680
Modified Files:
        
src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java
Branch: embedded
Log Message:

Fixed old tuple line protocol parsing


diffs (183 lines):

diff --git 
a/src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java 
b/src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java
--- 
a/src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java
+++ 
b/src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java
@@ -68,15 +68,13 @@ final class OldMapiTupleLineParser {
                }
 
                // extract separate fields by examining string, char for char
-               boolean inString = false, escaped = false;
+               boolean inString = false, escaped = false, fieldHasEscape = 
false;
                int cursor = 2, column = 0, i = 2;
                for (; i < len; i++) {
                        switch(array[i]) {
-                               default:
-                                       escaped = false;
-                                       break;
                                case '\\':
                                        escaped = !escaped;
+                                       fieldHasEscape = true;
                                        break;
                                case '"':
                                        /**
@@ -98,88 +96,91 @@ final class OldMapiTupleLineParser {
                                        break;
                                case '\t':
                                        if (!inString && (i > 0 && array[i - 1] 
== ',') || (i + 1 == len - 1 && array[++i] == ']')) { // dirty
-                                               // split!
-                                               if (array[cursor] == '"' && 
array[i - 2] == '"') {
-                                                       // reuse the 
tupleLineBuffer by cleaning it and ensure the capacity
-                                                       tupleLineBuffer.clear();
-                                                       tupleLineBuffer = 
BufferReallocator.ensureCapacity(tupleLineBuffer, (i - 2) - (cursor + 1));
-
-                                                       for (int pos = cursor + 
1; pos < i - 2; pos++) {
-                                                               if 
(array[cursor] == '\\' && pos + 1 < i - 2) {
-                                                                       pos++;
-                                                                       // 
strToStr and strFromStr in gdk_atoms.mx only support \t \n \\ \" and \377
-                                                                       switch 
(array[pos]) {
-                                                                               
case '\\':
-                                                                               
        tupleLineBuffer.put('\\');
-                                                                               
        break;
-                                                                               
case 'n':
-                                                                               
        tupleLineBuffer.put('\n');
-                                                                               
        break;
-                                                                               
case 't':
-                                                                               
        tupleLineBuffer.put('\t');
-                                                                               
        break;
-                                                                               
case '"':
-                                                                               
        tupleLineBuffer.put('"');
-                                                                               
        break;
-                                                                               
case '0': case '1': case '2': case '3':
-                                                                               
        // this could be an octal number, let's check it out
-                                                                               
        if (pos + 2 < i - 2 && array[pos + 1] >= '0' && array[pos + 1] <= '7' &&
-                                                                               
                        array[pos + 2] >= '0' && array[pos + 2] <= '7') {
-                                                                               
                // we got the number!
-                                                                               
                try {
-                                                                               
                        tupleLineBuffer.put((char)(Integer.parseInt("" + 
array[pos] + array[pos + 1] + array[pos + 2], 8)));
-                                                                               
                        pos += 2;
-                                                                               
                } catch (NumberFormatException e) {
-                                                                               
                        // hmmm, this point should never be reached actually...
-                                                                               
                        throw new AssertionError("Flow error, should never try 
to parse non-number");
+                                               // extract the field value as a 
string, without the potential escape codes
+                                               final int endpos = i - 2;       
// minus the tab and the comma or ]
+                                               if (array[cursor] == '"' && 
array[endpos] == '"') {
+                                                       cursor++;
+                                                       final int fieldlen = 
endpos - cursor;
+                                                       if (fieldHasEscape) {
+                                                               // reuse the 
tupleLineBuffer by cleaning it and ensure the capacity
+                                                               
tupleLineBuffer.clear();
+                                                               tupleLineBuffer 
= BufferReallocator.ensureCapacity(tupleLineBuffer, (i - 2) - (cursor + 1));
+                                                               // parse the 
field value (excluding the double quotes) and convert it to a string without 
any escape characters
+                                                               for (int pos = 
cursor; pos < endpos; pos++) {
+                                                                       char 
chr = array[pos];
+                                                                       if (chr 
== '\\' && pos + 1 < endpos) {
+                                                                               
// we detected an escape
+                                                                               
// escapedStr and GDKstrFromStr in gdk_atoms.c only
+                                                                               
// support \\ \f \n \r \t \" and \377
+                                                                               
pos++;
+                                                                               
chr = array[pos];
+                                                                               
switch (chr) {
+                                                                               
        case 'f':
+                                                                               
                tupleLineBuffer.put('\f');
+                                                                               
                break;
+                                                                               
        case 'n':
+                                                                               
                tupleLineBuffer.put('\n');
+                                                                               
                break;
+                                                                               
        case 'r':
+                                                                               
                tupleLineBuffer.put('\r');
+                                                                               
                break;
+                                                                               
        case 't':
+                                                                               
                tupleLineBuffer.put('\t');
+                                                                               
                break;
+                                                                               
        case '0': case '1': case '2': case '3':
+                                                                               
                // this could be an octal number, let's check it out
+                                                                               
                if (pos + 2 < endpos) {
+                                                                               
                        if (pos + 2 < i - 2 && array[pos + 1] >= '0' && 
array[pos + 1] <= '7' && array[pos + 2] >= '0' && array[pos + 2] <= '7') {
+                                                                               
                                // we got an octal number between \000 and \377
+                                                                               
                                try {
+                                                                               
                                        
tupleLineBuffer.put((char)(Integer.parseInt("" + array[pos] + array[pos + 1] + 
array[pos + 2], 8)));
+                                                                               
                                        pos += 2;
+                                                                               
                                } catch (NumberFormatException e) {
+                                                                               
                                        // hmmm, this point should never be 
reached actually...
+                                                                               
                                        throw new AssertionError("Flow error, 
should never try to parse non-number");
+                                                                               
                                }
+                                                                               
                        } else {
+                                                                               
                                // do default action if number seems not to be 
an octal number
+                                                                               
                                tupleLineBuffer.put(array[pos]);
+                                                                               
                        }
+                                                                               
                } else {
+                                                                               
                        // do default action if number seems not to be an octal 
number
+                                                                               
                        tupleLineBuffer.put(array[pos]);
                                                                                
                }
-                                                                               
        } else {
-                                                                               
                // do default action if number seems not to be correct
+                                                                               
                break;
+                                                                               
        default:
+                                                                               
                // this is wrong usage of escape (except for '\\' and '"'), 
just ignore the \-escape and print the char
                                                                                
                tupleLineBuffer.put(array[pos]);
-                                                                               
        }
-                                                                               
        break;
-                                                                               
default:
-                                                                               
        // this is wrong, just ignore the escape, and print the char
-                                                                               
        tupleLineBuffer.put(array[pos]);
-                                                                               
        break;
+                                                                               
                break;
+                                                                               
}
+                                                                       } else {
+                                                                               
tupleLineBuffer.put(array[pos]);
                                                                        }
-                                                               } else 
if(array[pos] == '\\') {
-                                                                       pos++;
-                                                                       switch 
(array[pos]) {
-                                                                               
case '\\':
-                                                                               
        tupleLineBuffer.put('\\');
-                                                                               
        break;
-                                                                               
case 'n':
-                                                                               
        tupleLineBuffer.put('\n');
-                                                                               
        break;
-                                                                               
case 't':
-                                                                               
        tupleLineBuffer.put('\t');
-                                                                               
        break;
-                                                                               
case '"':
-                                                                               
        tupleLineBuffer.put('"');
-                                                                               
        break;
-                                                                       }
-                                                               } else {
-                                                                       
tupleLineBuffer.put(array[pos]);
                                                                }
+                                                               // put the 
unescaped string in the right place
+                                                               
((Buffer)tupleLineBuffer).flip();
+                                                               
oldMapiStringToJavaDataConversion(protocol, tupleLineBuffer.array(), 0, 
tupleLineBuffer.limit(), lineNumber, values[column], typesMap[column]);
+                                                       } else {
+                                                               
oldMapiStringToJavaDataConversion(protocol, array, cursor, fieldlen, 
lineNumber, values[column], typesMap[column]);
                                                        }
-                                                       // put the unescaped 
string in the right place
-                                                       
((Buffer)tupleLineBuffer).flip();
-                                                       
oldMapiStringToJavaDataConversion(protocol, tupleLineBuffer.array(), 0,
-                                                                       
tupleLineBuffer.limit(), lineNumber, values[column], typesMap[column]);
-                                               } else if ((i - 1) - cursor == 
4 && OldMapiTupleLineParserHelper.charIndexOf(array,
-                                                               0,array.length, 
NULL_STRING, 0,4, cursor) == cursor) {
-                                                       
setNullValue(lineNumber, values[column], typesMap[column]);
                                                } else {
-                                                       
oldMapiStringToJavaDataConversion(protocol, array, cursor, i - 1 - cursor, 
lineNumber,
-                                                                       
values[column], typesMap[column]);
+                                                       final int vlen = i - 1 
- cursor;
+                                                       if (vlen == 4 && 
OldMapiTupleLineParserHelper.charIndexOf(array, 0, array.length, NULL_STRING, 
0, 4, cursor) == cursor) {
+                                                               
setNullValue(lineNumber, values[column], typesMap[column]);
+                                                       } else {
+                                                               
oldMapiStringToJavaDataConversion(protocol, array, cursor, vlen, lineNumber, 
values[column], typesMap[column]);
+                                                       }
                                                }
+                                               cursor = i + 1;
+                                               fieldHasEscape = false;         
// reset for next field scan
                                                column++;
-                                               cursor = i + 1;
                                        }
                                        // reset escaped flag
                                        escaped = false;
                                        break;
+                               default:
+                                       escaped = false;
+                                       break;
                        }
                }
                protocol.tupleLineBuffer = tupleLineBuffer;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to