Changeset: b9f7c50417ed for monetdb-java
URL: https://dev.monetdb.org/hg/monetdb-java/rev/b9f7c50417ed
Added Files:
        src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java
Modified Files:
        ChangeLog
        src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java
        tests/JDBC_API_Tester.java
Branch: default
Log Message:

Properly unescape backslash escapes in response headers

Fixes #11


diffs (265 lines):

diff --git a/ChangeLog b/ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,3 +8,6 @@
 For a complete list of changes in previous monetdb-java releases see:
   https://www.monetdb.org/downloads/Java/archive/ChangeLog-Archive
 
+* Fri Jan  2 2026 Joeri van Ruth <[email protected]>
+- Fix backslash issues in ResultSetMetaData column names. (#11).
+
diff --git a/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java 
b/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java
--- a/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java
+++ b/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java
@@ -30,6 +30,8 @@ public final class HeaderLineParser exte
         /** The int values found while parsing.  Public, you may touch it. */
        public final int intValues[];
 
+       private final QuotedStringParser qsparser = new QuotedStringParser();
+
        /**
         * Constructs a HeaderLineParser which expects columncount columns.
         *
@@ -146,68 +148,43 @@ public final class HeaderLineParser exte
         * the name contains a comma or a tab or a space or a # or " or \ 
escape character.
         * See issue: https://github.com/MonetDB/MonetDB/issues/3616
         * If the parsed name string part has a " as first and last character,
-        * we remove those added double quotes here.
+        * we remove those added double quotes here and expand any backslash 
escapes.
         *
         * @param chrLine a character array holding the input data
         * @param start where the relevant data starts
-        * @param stop where the relevant data stops
+        * @param end where the relevant data stops
         */
-       private final void getValues(final char[] chrLine, int start, final int 
stop) {
+       private void getValues(final char[] chrLine, int start, final int end) 
throws MCLParseException {
                int elem = 0;
-               boolean inString = false, escaped = false;
 
-               for (int i = start; i < stop; i++) {
-                       switch(chrLine[i]) {
-                               case '\\':
-                                       escaped = !escaped;
-                                       break;
-                               case '"':
-                                       /**
-                                        * If all strings are wrapped between 
two quotes, a \" can
-                                        * never exist outside a string. Thus 
if we believe that we
-                                        * are not within a string, we can 
safely assume we're about
-                                        * to enter a string if we find a quote.
-                                        * If we are in a string we should stop 
being in a string if
-                                        * we find a quote which is not 
prefixed by a \, for that
-                                        * would be an escaped quote. However, 
a nasty situation can
-                                        * occur where the string is like "test 
\\" as obvious, a
-                                        * test for a \ in front of a " doesn't 
hold here for all
-                                        * cases. Because "test \\\"" can exist 
as well, we need to
-                                        * know if a quote is prefixed by an 
escaping slash or not.
-                                        */
-                                       if (!inString) {
-                                               inString = true;
-                                       } else if (!escaped) {
-                                               inString = false;
-                                       }
-                                       // reset escaped flag
-                                       escaped = false;
-                                       break;
-                               case ',':
-                                       if (!inString && chrLine[i + 1] == 
'\t') {
-                                               // we found the field separator
-                                               if (chrLine[start] == '"')
-                                                       start++;  // skip 
leading double quote
-                                               if (elem < values.length) {
-                                                       // TODO: also deal with 
escape characters as done in TupleLineParser.parse()
-                                                       values[elem++] = new 
String(chrLine, start, i - (chrLine[i - 1] == '"' ? 1 : 0) - start);
-                                               }
-                                               i++;
-                                               start = i + 1;  // reset start 
for the next name, skipping the field separator (a comma and tab)
-                                       }
-                                       // reset escaped flag
-                                       escaped = false;
-                                       break;
-                               default:
-                                       escaped = false;
-                                       break;
+               int pos = start;
+               while (pos < end) {
+                       // Extract a value and leave pos at its end
+                       String value;
+                       if (chrLine[pos] == '"') {
+                               value = qsparser.parse(chrLine, pos, end);
+                               pos += qsparser.size;
+                       } else {
+                               int i = pos;
+                               while (i < end && chrLine[i] != ',')
+                                       i++;
+                               value = new String(chrLine, pos, i - pos);
+                               pos = i;
                        }
+
+                       // Is it a suitable separator (or end?)
+                       if ((pos != end) && (pos > end - 2 || chrLine[pos] != 
',' || chrLine[pos + 1] != '\t'))
+                               throw new MCLParseException("invalid 
separator", pos);
+
+                       // Append the value and skip the separator
+                       if (elem >= values.length)
+                               throw new MCLParseException("too many values", 
pos);
+                       values[elem++] = value;
+                       pos += 2;
                }
-               // add the left over part (last column)
-               if (chrLine[start] == '"')
-                       start++;  // skip leading double quote
-               if (elem < values.length)
-                       values[elem] = new String(chrLine, start, stop - 
(chrLine[stop - 1] == '"' ? 1 : 0) - start);
+
+               if (elem != values.length)
+                       throw new MCLParseException("not enough values", end);
        }
 
        /**
diff --git a/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java 
b/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java
new file mode 100644
--- /dev/null
+++ b/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java
@@ -0,0 +1,119 @@
+/*
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 2024, 2025 MonetDB Foundation;
+ * Copyright August 2008 - 2023 MonetDB B.V.;
+ * Copyright 1997 - July 2008 CWI.
+ */
+
+package org.monetdb.mcl.parser;
+
+/**
+ * Utility class to parse quoted strings in MAPI responses.
+ *
+ * We need both the unquoted string and the quoted size but
+ * in Java you can only return a single result so we use
+ * this little class to hold the other one.
+ */
+public class QuotedStringParser {
+       private StringBuilder builder = null;
+       public int size;
+
+       /**
+        * Parse a quoted string from the given character array.
+        * When this method returns, the {@link #size} field indicates the 
number of
+        * characters it occupies in the array.
+        * Because of backslash escapes this may be more than the length of the
+        * returned string.
+        *
+        * This method recognizes the escape characters \\, \", \f, \n, \r, \t 
and \377.
+        * The previous implementation ignored invalid escapes, returning them 
as-is.
+        * This method throws an exception instead.
+        *
+        * @param line array to parse from
+        * @param start position of the opening quote character
+        * @param end do not parse beyond this position
+        * @return the parsed string
+        * @throws MCLParseException if invalid backslash escapes are found or 
the
+        * terminating quote character is missing
+        */
+       public String parse(char[] line, int start, int end) throws 
MCLParseException {
+               // Happy path: no backslashes
+               for (int i = start + 1; i < end; i++) {
+                       char chr = line[i];
+                       if (chr == '"') {
+                               // size should include the quotes, return value 
shouldn't
+                               size = i + 1 - start;
+                               return new String(line, start + 1, i - start - 
1);
+                       }
+                       if (chr == '\\') {
+                               // fall back to backslash unescaping code
+                               return parseEscapes(line, start, i, end);
+                       }
+               }
+               throw new MCLParseException("unterminated quoted string", end);
+       }
+
+       private String parseEscapes(char[] line, int start, int pos, int end) 
throws MCLParseException {
+               if (builder != null)
+                       builder.setLength(0);
+               else
+                       builder = new StringBuilder(end - start + 16);
+               builder.append(line, start + 1, pos - start - 1);
+
+               for (int i = pos; i < end; i++) {
+                       char chr = line[i];
+                       if (chr == '"') {
+                               // We reached the end
+                               size = i + 1 - start;
+                               return builder.toString();
+                       }
+                       if (chr == '\\') {
+                               // Parse the backslash escape
+                               int location = i;
+                               if (i + 1 == end)
+                                       throw new MCLParseException("truncated 
escape sequence", location);
+                               char second = line[++i];
+                               switch (second) {
+                                       case '\\':
+                                       case '"':
+                                               chr = second;
+                                               break;
+                                       case 'f':
+                                               chr = '\f';
+                                               break;
+                                       case 'n':
+                                               chr = '\n';
+                                               break;
+                                       case 'r':
+                                               chr = '\r';
+                                               break;
+                                       case 't':
+                                               chr = '\t';
+                                               break;
+                                       case '0':
+                                       case '1':
+                                       case '2':
+                                       case '3':
+                                               if (i + 3 >= end)
+                                                       throw new 
MCLParseException("truncated escape sequence", location);
+                                               int digit1 = second - '0';
+                                               int digit2 = line[++i] - '0';
+                                               int digit3 = line[++i] - '0';
+                                               if (digit2 < 0 | digit2 > 7 | 
digit3 < 0 | digit3 > 7)
+                                                       throw new 
MCLParseException("invalid escape sequence", i);
+                                               chr = (char)(64 * digit1 + 8 * 
digit2 + digit3);
+                                               break;
+                                       default:
+                                               throw new 
MCLParseException("unexpected escape sequence", location);
+                               }
+                       } // end of if (chr=='\\')
+                       builder.append(chr);
+               }
+               throw new MCLParseException("unterminated quoted string", end);
+       }
+}
diff --git a/tests/JDBC_API_Tester.java b/tests/JDBC_API_Tester.java
--- a/tests/JDBC_API_Tester.java
+++ b/tests/JDBC_API_Tester.java
@@ -6609,7 +6609,7 @@ public final class JDBC_API_Tester {
                                "12. show content of column(s): 
\"my\"\"double_doublequote\"\n" +
                                "Resultset with 1 columns\n" +
                                "       Column Name, Column Label:\n" +
-                               "1      my\\\"double_doublequote        
my\\\"double_doublequote\n" +
+                               "1      my\"double_doublequote  
my\"double_doublequote\n" +
                                "Data rows:\n" +
                                "\"my\"\"double_doublequote\"\n" +
                                "my\"\"double_doublequote\n" +
@@ -6665,7 +6665,7 @@ public final class JDBC_API_Tester {
                                "5      my      tab     my      tab\n" +
                                "6      my      ,tab_comma      my      
,tab_comma\n" +
                                "7      my,     comma_tab       my,     
comma_tab\n" +
-                               "8      my\\\"double_doublequote        
my\\\"double_doublequote\n" +
+                               "8      my\"double_doublequote  
my\"double_doublequote\n" +
                                "9      Abc     Abc\n" +
                                "10              \n" +
                                "11     123     123\n" +
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to