Changeset: b9f7c50417ed for monetdb-java URL: https://dev.monetdb.org/hg/monetdb-java/rev/b9f7c50417ed Added Files: src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java Modified Files: ChangeLog src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java tests/JDBC_API_Tester.java Branch: default Log Message:
Properly unescape backslash escapes in response headers Fixes #11 diffs (265 lines): diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -8,3 +8,6 @@ For a complete list of changes in previous monetdb-java releases see: https://www.monetdb.org/downloads/Java/archive/ChangeLog-Archive +* Fri Jan 2 2026 Joeri van Ruth <[email protected]> +- Fix backslash issues in ResultSetMetaData column names. (#11). + diff --git a/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java b/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java --- a/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java +++ b/src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java @@ -30,6 +30,8 @@ public final class HeaderLineParser exte /** The int values found while parsing. Public, you may touch it. */ public final int intValues[]; + private final QuotedStringParser qsparser = new QuotedStringParser(); + /** * Constructs a HeaderLineParser which expects columncount columns. * @@ -146,68 +148,43 @@ public final class HeaderLineParser exte * the name contains a comma or a tab or a space or a # or " or \ escape character. * See issue: https://github.com/MonetDB/MonetDB/issues/3616 * If the parsed name string part has a " as first and last character, - * we remove those added double quotes here. + * we remove those added double quotes here and expand any backslash escapes. * * @param chrLine a character array holding the input data * @param start where the relevant data starts - * @param stop where the relevant data stops + * @param end where the relevant data stops */ - private final void getValues(final char[] chrLine, int start, final int stop) { + private void getValues(final char[] chrLine, int start, final int end) throws MCLParseException { int elem = 0; - boolean inString = false, escaped = false; - for (int i = start; i < stop; i++) { - switch(chrLine[i]) { - case '\\': - escaped = !escaped; - break; - case '"': - /** - * If all strings are wrapped between two quotes, a \" can - * never exist outside a string. Thus if we believe that we - * are not within a string, we can safely assume we're about - * to enter a string if we find a quote. - * If we are in a string we should stop being in a string if - * we find a quote which is not prefixed by a \, for that - * would be an escaped quote. However, a nasty situation can - * occur where the string is like "test \\" as obvious, a - * test for a \ in front of a " doesn't hold here for all - * cases. Because "test \\\"" can exist as well, we need to - * know if a quote is prefixed by an escaping slash or not. - */ - if (!inString) { - inString = true; - } else if (!escaped) { - inString = false; - } - // reset escaped flag - escaped = false; - break; - case ',': - if (!inString && chrLine[i + 1] == '\t') { - // we found the field separator - if (chrLine[start] == '"') - start++; // skip leading double quote - if (elem < values.length) { - // TODO: also deal with escape characters as done in TupleLineParser.parse() - values[elem++] = new String(chrLine, start, i - (chrLine[i - 1] == '"' ? 1 : 0) - start); - } - i++; - start = i + 1; // reset start for the next name, skipping the field separator (a comma and tab) - } - // reset escaped flag - escaped = false; - break; - default: - escaped = false; - break; + int pos = start; + while (pos < end) { + // Extract a value and leave pos at its end + String value; + if (chrLine[pos] == '"') { + value = qsparser.parse(chrLine, pos, end); + pos += qsparser.size; + } else { + int i = pos; + while (i < end && chrLine[i] != ',') + i++; + value = new String(chrLine, pos, i - pos); + pos = i; } + + // Is it a suitable separator (or end?) + if ((pos != end) && (pos > end - 2 || chrLine[pos] != ',' || chrLine[pos + 1] != '\t')) + throw new MCLParseException("invalid separator", pos); + + // Append the value and skip the separator + if (elem >= values.length) + throw new MCLParseException("too many values", pos); + values[elem++] = value; + pos += 2; } - // add the left over part (last column) - if (chrLine[start] == '"') - start++; // skip leading double quote - if (elem < values.length) - values[elem] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start); + + if (elem != values.length) + throw new MCLParseException("not enough values", end); } /** diff --git a/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java b/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java new file mode 100644 --- /dev/null +++ b/src/main/java/org/monetdb/mcl/parser/QuotedStringParser.java @@ -0,0 +1,119 @@ +/* + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 2024, 2025 MonetDB Foundation; + * Copyright August 2008 - 2023 MonetDB B.V.; + * Copyright 1997 - July 2008 CWI. + */ + +package org.monetdb.mcl.parser; + +/** + * Utility class to parse quoted strings in MAPI responses. + * + * We need both the unquoted string and the quoted size but + * in Java you can only return a single result so we use + * this little class to hold the other one. + */ +public class QuotedStringParser { + private StringBuilder builder = null; + public int size; + + /** + * Parse a quoted string from the given character array. + * When this method returns, the {@link #size} field indicates the number of + * characters it occupies in the array. + * Because of backslash escapes this may be more than the length of the + * returned string. + * + * This method recognizes the escape characters \\, \", \f, \n, \r, \t and \377. + * The previous implementation ignored invalid escapes, returning them as-is. + * This method throws an exception instead. + * + * @param line array to parse from + * @param start position of the opening quote character + * @param end do not parse beyond this position + * @return the parsed string + * @throws MCLParseException if invalid backslash escapes are found or the + * terminating quote character is missing + */ + public String parse(char[] line, int start, int end) throws MCLParseException { + // Happy path: no backslashes + for (int i = start + 1; i < end; i++) { + char chr = line[i]; + if (chr == '"') { + // size should include the quotes, return value shouldn't + size = i + 1 - start; + return new String(line, start + 1, i - start - 1); + } + if (chr == '\\') { + // fall back to backslash unescaping code + return parseEscapes(line, start, i, end); + } + } + throw new MCLParseException("unterminated quoted string", end); + } + + private String parseEscapes(char[] line, int start, int pos, int end) throws MCLParseException { + if (builder != null) + builder.setLength(0); + else + builder = new StringBuilder(end - start + 16); + builder.append(line, start + 1, pos - start - 1); + + for (int i = pos; i < end; i++) { + char chr = line[i]; + if (chr == '"') { + // We reached the end + size = i + 1 - start; + return builder.toString(); + } + if (chr == '\\') { + // Parse the backslash escape + int location = i; + if (i + 1 == end) + throw new MCLParseException("truncated escape sequence", location); + char second = line[++i]; + switch (second) { + case '\\': + case '"': + chr = second; + break; + case 'f': + chr = '\f'; + break; + case 'n': + chr = '\n'; + break; + case 'r': + chr = '\r'; + break; + case 't': + chr = '\t'; + break; + case '0': + case '1': + case '2': + case '3': + if (i + 3 >= end) + throw new MCLParseException("truncated escape sequence", location); + int digit1 = second - '0'; + int digit2 = line[++i] - '0'; + int digit3 = line[++i] - '0'; + if (digit2 < 0 | digit2 > 7 | digit3 < 0 | digit3 > 7) + throw new MCLParseException("invalid escape sequence", i); + chr = (char)(64 * digit1 + 8 * digit2 + digit3); + break; + default: + throw new MCLParseException("unexpected escape sequence", location); + } + } // end of if (chr=='\\') + builder.append(chr); + } + throw new MCLParseException("unterminated quoted string", end); + } +} diff --git a/tests/JDBC_API_Tester.java b/tests/JDBC_API_Tester.java --- a/tests/JDBC_API_Tester.java +++ b/tests/JDBC_API_Tester.java @@ -6609,7 +6609,7 @@ public final class JDBC_API_Tester { "12. show content of column(s): \"my\"\"double_doublequote\"\n" + "Resultset with 1 columns\n" + " Column Name, Column Label:\n" + - "1 my\\\"double_doublequote my\\\"double_doublequote\n" + + "1 my\"double_doublequote my\"double_doublequote\n" + "Data rows:\n" + "\"my\"\"double_doublequote\"\n" + "my\"\"double_doublequote\n" + @@ -6665,7 +6665,7 @@ public final class JDBC_API_Tester { "5 my tab my tab\n" + "6 my ,tab_comma my ,tab_comma\n" + "7 my, comma_tab my, comma_tab\n" + - "8 my\\\"double_doublequote my\\\"double_doublequote\n" + + "8 my\"double_doublequote my\"double_doublequote\n" + "9 Abc Abc\n" + "10 \n" + "11 123 123\n" + _______________________________________________ checkin-list mailing list -- [email protected] To unsubscribe send an email to [email protected]
