Hi Naoto, See another patch attached with Locale.ROOT for the BreakIterator. I will be glad to hear of any feedback. There is another patch [1] around dealing with manifest attributes during application launch. It certainly is related to 6202130 but feels like a distinct set of changes with a slightly different concern. Any opinion as how to proceed with that one? Regards,Philipp
[1] https://mail.openjdk.java.net/pipermail/core-libs-dev/2020-February/064720.html On Mon, 2020-03-23 at 09:05 -0700, naoto.s...@oracle.com wrote: > Hi Philipp, > Right, Locale.ROOT is more appropriate here by definition, though the > implementation is the same. > Naoto > On 3/21/20 5:19 AM, Philipp Kunz wrote: > > Hi Naoto and everyone, > > There are almost as many occurrences of Locale.ROOT as Locale.US > > which made me wonder which one is more appropriately locale- > > independent and which is probably another topic and not actually > > relevant here because BreakIterator.getCharacterInstance is locale- > > agnostic as far as I can tell. > > See attached patch with another attempt to fix bug 6202130. > > Regards,Philipp > > > > On Tue, 2020-03-10 at 10:45 -0700, naoto.s...@oracle.com wrote: > > > Hi Philipp, > > > ..., so using BreakIterator (withLocale.US) is more preferred > > > solution to me. > > > Naoto
diff -r fb44f597981a src/java.base/share/classes/java/util/jar/Attributes.java --- a/src/java.base/share/classes/java/util/jar/Attributes.java Wed Mar 25 09:21:46 2020 -0700 +++ b/src/java.base/share/classes/java/util/jar/Attributes.java Wed Mar 25 19:49:15 2020 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -301,7 +301,6 @@ /* * Writes the current attributes to the specified data output stream. - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ void write(DataOutputStream out) throws IOException { StringBuilder buffer = new StringBuilder(72); @@ -319,8 +318,6 @@ * Writes the current attributes to the specified data output stream, * make sure to write out the MANIFEST_VERSION or SIGNATURE_VERSION * attributes first. - * - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ void writeMain(DataOutputStream out) throws IOException { StringBuilder buffer = new StringBuilder(72); diff -r fb44f597981a src/java.base/share/classes/java/util/jar/Manifest.java --- a/src/java.base/share/classes/java/util/jar/Manifest.java Wed Mar 25 09:21:46 2020 -0700 +++ b/src/java.base/share/classes/java/util/jar/Manifest.java Wed Mar 25 19:49:15 2020 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.text.BreakIterator; +import java.util.Locale; import java.util.HashMap; import java.util.Map; @@ -230,30 +232,89 @@ } /** - * Writes {@code line} to {@code out} with line breaks and continuation - * spaces within the limits of 72 bytes of contents per line followed - * by a line break. + * Writes {@code line} to {@code out} with at most 72 bytes per line. + * Byte sequences of characters and grapheme clusters encoded in UTF-8 + * with less than 72 bytes are not interrupted with line breaks. */ static void println72(OutputStream out, String line) throws IOException { - if (!line.isEmpty()) { - byte[] lineBytes = line.getBytes(UTF_8.INSTANCE); - int length = lineBytes.length; - // first line can hold one byte more than subsequent lines which - // start with a continuation line break space - out.write(lineBytes[0]); - int pos = 1; - while (length - pos > 71) { - out.write(lineBytes, pos, 71); - pos += 71; - println(out); - out.write(' '); - } - out.write(lineBytes, pos, length - pos); + int linePos = 0; // number of bytes already put out on current line + BreakIterator characterBoundaryIterator = + BreakIterator.getCharacterInstance(Locale.ROOT); + characterBoundaryIterator.setText(line); + int start = characterBoundaryIterator.first(), end; + while ((end = characterBoundaryIterator.next()) != BreakIterator.DONE) { + String characterString = line.substring(start, end); + start = end; + linePos = printChar72(out, linePos, characterString); } println(out); } /** + * Writes {@code characterString} in one piece on the current line or on a + * new continuation line after a line break unless it exceeds 71 bytes in + * its UTF-8 encoded form. More than 71 bytes cannot fit in a manifest line + * and will be broken across line breaks, reverting to respecting code point + * boundaries. + */ + private static int printChar72(OutputStream out, int linePos, + String characterString) throws IOException { + // characterString contains one grapheme cluster consisting of an + // unlimited number of primitive chars. + byte[] characterBytes = characterString.getBytes(UTF_8.INSTANCE); + int characterLength = characterBytes.length; + int characterPos = 0; // number of bytes of current character + // already put out + + // Put out a break onto a new line if the character or rather grapheme + // cluster does not fit on the current line anymore but fits on a new + // line. + // In other words, only if the current grapheme cluster exceeds 71 bytes + // in its UTF-8 encoded form and therefore does not fit on one whole + // continuation line alone, skip this line break and fill the current + // line first before putting a break inside of the grapheme cluster. + if (linePos + characterLength > 72 && characterLength < 72) { + println(out); + out.write(' '); + linePos = 1; + } + + // Break exceptionally large grapheme clusters that don't fit on one + // line at code point boundaries. + while (linePos + characterLength - characterPos > 72) { + int nextBreakPos = 72 - linePos; + while (isContinuationByte( + characterBytes[characterPos + nextBreakPos])) { + nextBreakPos--; + } + out.write(characterBytes, characterPos, nextBreakPos); + characterPos += nextBreakPos; + println(out); + out.write(' '); + linePos = 1; + } + + int remainder = characterLength - characterPos; + out.write(characterBytes, characterPos, remainder); + linePos += remainder; + return linePos; + } + + /** + * Returns {@code true} if the passed byte as parameter {@code b} + * is not the first (or only) byte of a Unicode character encoded in UTF-8 + * and {@code false} otherwise. + * + * @see <a href="https://tools.ietf.org/html/rfc3629#section-3"> + * RFC 3629 - UTF-8, a transformation format of ISO 10646</a> + * @see StringCoding#isNotContinuation(int) + * @see sun.nio.cs.UTF_8.Decoder#isNotContinuation(int) + */ + private static boolean isContinuationByte(byte b) { + return (b & 0xc0) == 0x80; + } + + /** * Writes a line break to {@code out}. */ static void println(OutputStream out) throws IOException { diff -r fb44f597981a test/jdk/java/util/jar/Manifest/PrintChar72.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/jdk/java/util/jar/Manifest/PrintChar72.java Wed Mar 25 19:49:15 2020 +0100 @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.jar.Attributes; +import java.util.jar.Manifest; +import java.util.jar.Attributes.Name; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.testng.annotations.*; +import static org.testng.Assert.*; + +/** + * @test + * @bug 6202130 + * @compile ../../../../sun/security/tools/jarsigner/Utils.java + * @run testng PrintChar72 + * @summary Tests {@link Manifest#printChar72} breaking manifest header values + * across lines in conjunction with Unicode characters encoded in UTF-8 with a + * variable number of bytes when reading and writing jar manifests results in + * valid UTF-8. + * <p> + * The manifest line length limit (72 bytes) may be reached at a position + * between multiple bytes of a single UTF-8 encoded character or grapheme + * cluster. Although grapheme clusters should not be broken across lines + * according to the specification the previous Manifest implementation did. + * <p> + * This test makes sure that no Unicode code point character is broken apart + * across a line break when writing manifests and also that manifests are still + * read correctly whether or not characters encoded in UTF-8 with more than one + * byte are interrupted with and continued after a line break for compatibility + * when reading older manifests. + * <p> + * For cases with combining character sequences aka grapheme clusters see + * {@link PrintLine72}. + */ +public class PrintChar72 { + + static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72; + + /** + * Character string that has one byte size in its UTF-8 encoded form to + * yield one byte of position offset. + */ + static final String FILL1BYTE = "x"; + static final String MARK_BEFORE = "y"; + static final String MARK_AFTER = "z"; + + /** + * Four byte name. + * By using header names of four characters length the same values can be + * used for testing line breaks in both headers (in main attributes as well + * as named sections) as well as section names because a named section name + * is represented basically like any other header but follows an empty line + * and the key is always "Name". + * Relative to the start of the value, this way the same offset to the + * character to test breaking can be used in all cases. + */ + static final String FOUR_BYTE_NAME = "Name"; + + /** + * Distinguishes main attributes headers, section names, and headers in + * named sections because an implementation might make a difference. + */ + enum PositionInManifest { + /** + * @see Attributes#writeMain + */ + MAIN_ATTRIBUTES, + /** + * @see Attributes#write + */ + SECTION_NAME, + /** + * @see Manifest#write + */ + NAMED_SECTION; + } + + static String numByteUnicodeCharacter(int numBytes) { + String string; + switch (numBytes) { + case 1: string = "i"; break; + case 2: string = "\u00EF"; break; // small letter i with diaresis + case 3: string = "\uFB00"; break; // small double f ligature + case 4: string = Character.toString(0x2070E); break; // surrogate pair + default: throw new AssertionError(); + } + assertEquals(string.getBytes(UTF_8).length, numBytes, + "self-test failed: unexpected UTF-8 encoded character length"); + return string; + } + + /** + * Produces test cases with all combinations of circumstances covered in + * which a character could possibly be attempted to be broken across a line + * break onto a continuation line:<ul> + * <li>different sizes of a UTF-8 encoded characters: one, two, three, and + * four bytes,</li> + * <li>all possible positions of the character to test breaking with + * relative respect to the 72-byte line length limit including immediately + * before that character and immediately after the character and every + * position in between for multi-byte UTF-8 encoded characters,</li> + * <li>different number of preceding line breaks in the same value</li> + * <li>at the end of the value or followed by another character</li> + * <li>in a main attributes header value, section name, or named section + * header value (see also {@link #PositionInManifest})</li> + * </ul> + * The same set of test parameters is used to write and read manifests + * once without breaking characters apart + * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int, + * PositionInManifest, String, String)}) and once with doing so + * ({@link #readCharactersBrokenAcrossLines(int, int, int, int, + * PositionInManifest, String, String)}) the latter case of which covering + * backwards compatibility and involves writing manifests like they were + * written before resolution of bug 6202130. + */ + @DataProvider(name = "lineBreakParameters") + public static Object[][] lineBreakParameters() { + return Stream.of(new Object[] { null } + ).flatMap(o -> + // b: number of line breaks before character under test + IntStream.rangeClosed(0, 3).mapToObj( + b -> new Object[] { b }) + ).flatMap(o -> + // c: unicode character UTF-8 encoded length in bytes + IntStream.rangeClosed(1, 4).mapToObj( + c -> new Object[] { o[0], c }) + ).flatMap(o -> + // p: potential break position offset in bytes + // p == 0 => before character, + // p == c => after character, and + // 0 < p < c => position within the character's byte sequence + IntStream.rangeClosed(0, (int) o[1]).mapToObj( + p -> new Object[] { o[0], o[1], p }) + ).flatMap(o -> + // a: no or one character following the one under test + // (a == 0 meaning the character under test is the end of + // the value which is followed by a line break in the + // resulting manifest without continuation line space which + // concludes the value) + IntStream.rangeClosed(0, 1).mapToObj( + a -> new Object[] { o[0], o[1], o[2], a }) + ).flatMap(o -> + Stream.of(PositionInManifest.values()).map( + i -> new Object[] { o[0], o[1], o[2], o[3], i }) + ).map(o -> { + int b = (int) o[0]; + int c = (int) o[1]; + int p = (int) o[2]; + int a = (int) o[3]; + PositionInManifest i = (PositionInManifest) o[4]; + + // offset: so many characters (actually bytes here, because filled + // with one byte characters) are needed to place the next character + // (the character under test) into a position relative to the + // maximum line width that it may or may not have to be broken onto + // the next line + int offset = + // number of lines; - 1 due to continuation " " + b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1) + // first line available content space (after "Name: ") + + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6 + // position of line width limit relative to tested character + - p; + + String value = ""; + value += FILL1BYTE.repeat(offset - 1); + // character before the one to test the break + value += MARK_BEFORE; + String character = numByteUnicodeCharacter(c); + value += character; + // (optional) character after the one to test the break + value += MARK_AFTER.repeat(a); + + return new Object[] { b, c, p, a, i, character, value }; + }).toArray(size -> new Object[size][]); + } + + /** + * Checks that unicode characters work well with line breaks and + * continuation lines in jar manifests without breaking a character across + * a line break even when encoded in UTF-8 with more than one byte. + * <p> + * For each of the cases provided by {@link #lineBreakParameters()} the + * break position is verified in the written manifest binary form as well + * as verified that it restores to the original values when read again. + * <p> + * As an additional check, the binary manifests are decoded from UTF-8 + * into strings before re-joining continued lines. + */ + @Test(dataProvider = "lineBreakParameters") + public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p, + int a, PositionInManifest i, String character, String value) + throws Exception { + // in order to unambiguously establish the position of "character" in + // brokenPart, brokenPart is prepended and appended with what is + // expected before and after it... + String brokenPart = MARK_BEFORE; + + // expect the whole character on the next line unless it fits + // completely on the current line + boolean breakExpectedBeforeCharacter = p < c; + if (breakExpectedBeforeCharacter) { + brokenPart += "\r\n "; + } + brokenPart += character; + // expect a line break before the next character if there is a next + // character and the previous not already broken on next line + if (a > 0) { + if (!breakExpectedBeforeCharacter) { + brokenPart += "\r\n "; + } + brokenPart += MARK_AFTER; + } + brokenPart = brokenPart + "\r\n"; + + byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value); + try { + assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8)); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); + decodeManifestAsUtf8AndAssertValue( + mfBytes, FOUR_BYTE_NAME, value, true); + } catch (AssertionError e) { + Utils.echoManifest(mfBytes, "faulty manifest: " + e); + throw e; + } + } + + static byte[] writeManifest(PositionInManifest i, String name, + String value) throws IOException { + Manifest mf = new Manifest(); + mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); + + switch (i) { + case MAIN_ATTRIBUTES: + mf.getMainAttributes().put(new Name(name), value); + break; + case SECTION_NAME: + mf.getEntries().put(value, new Attributes()); + break; + case NAMED_SECTION: + Attributes attributes = new Attributes(); + mf.getEntries().put(FOUR_BYTE_NAME, attributes); + attributes.put(new Name(name), value); + break; + } + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mf.write(out); + return out.toByteArray(); + } + + /** + * Asserts one and only one occurrence of a sequence of bytes {@code part} + * representing the character and how it is expected to be broken and its + * surrounding bytes in a larger sequence that corresponds to the manifest + * in binary form {@code mf}. + */ + static void assertOccurrence(byte[] mf, byte[] part) { + List<Integer> matchPos = new LinkedList<>(); + for (int i = 0; i < mf.length; i++) { + for (int j = 0; j < part.length && i + j <= mf.length; j++) { + if (part[j] == 0) { + if (i + j != mf.length) { + break; // expected eof not found + } + } else if (i + j == mf.length) { + break; + } else if (mf[i + j] != part[j]) { + break; + } + if (j == part.length - 1) { + matchPos.add(i); + } + } + } + assertEquals(matchPos.size(), 1, "not " + + (matchPos.size() < 1 ? "found" : "unique") + ": '" + + new String(part, UTF_8) + "'"); + } + + static void readManifestAndAssertValue( + byte[] mfBytes, PositionInManifest i, String name, String value) + throws IOException { + Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes)); + + switch (i) { + case MAIN_ATTRIBUTES: + assertEquals(mf.getMainAttributes().getValue(name), value, + "main attributes header value"); + break; + case SECTION_NAME: + Attributes attributes = mf.getAttributes(value); + assertNotNull(attributes, "named section not found"); + break; + case NAMED_SECTION: + attributes = mf.getAttributes(FOUR_BYTE_NAME); + assertEquals(attributes.getValue(name), value, + "named section attributes header value"); + break; + } + } + + /** + * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before + * joining the continuation lines unlike {@link Manifest} and + * {@link Attributes} which join the continuation lines first, before + * decoding the joined line from UTF-8 into a {@link String}, evaluating + * whether or not the binary manifest is valid UTF-8. + */ + static void decodeManifestAsUtf8AndAssertValue( + byte[] mfBytes, String name, String value, + boolean validUtf8ManifestExpected) throws IOException { + String mf = new String(mfBytes, UTF_8) + .replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", ""); + String header = "\r\n" + name + ": " + value + "\r\n"; + int pos = mf.indexOf(header); + if (validUtf8ManifestExpected) { + assertTrue(pos > 0); + pos = mf.indexOf(header, pos + 1); + } + // assert no ocurrence or no other occurrence after one match above + assertTrue(pos == -1); + } + + @Test(dataProvider = "lineBreakParameters") + public void readCharactersBrokenAcrossLines(int b, int c, int p, int a, + PositionInManifest i, String character, String value) + throws Exception { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + buf.write(MARK_BEFORE.getBytes(UTF_8)); + byte[] characterBytes = character.getBytes(UTF_8); + // the portion of the character that fits on the current line before + // a break at 72 bytes, ranges from nothing (p == 0) to the whole + // character (p == c) + for (int j = 0; j < p; j++) { + buf.write(characterBytes, j, 1); + } + // expect a line break at exactly 72 bytes from the beginning of the + // line unless the whole character fits on that line + boolean breakExpectedBeforeCharacter = p < c; + if (breakExpectedBeforeCharacter) { + buf.write("\r\n ".getBytes(UTF_8)); + } + // the remaining portion of the character, if any + for (int j = p; j < c; j++) { + buf.write(characterBytes, j, 1); + } + // expect another line break if the whole character fitted on the same + // line and there is another character + if (a == 1) { + if (c == p) { + buf.write("\r\n ".getBytes(UTF_8)); + } + buf.write(MARK_AFTER.getBytes(UTF_8)); + } + // expect a line break immediately after the value + buf.write("\r\n".getBytes(UTF_8)); + byte[] brokenPart = buf.toByteArray(); + + byte[] mfBytes = writeManifestWithBrokenCharacters( + i, FOUR_BYTE_NAME, value); + try { + assertOccurrence(mfBytes, brokenPart); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); + decodeManifestAsUtf8AndAssertValue( + mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c); + } catch (AssertionError e) { + Utils.echoManifest(mfBytes, "faulty manifest: " + e); + throw e; + } + } + + /** + * From the previous {@link Manifest} implementation reduced to the minimum + * required to demonstrate compatibility. + */ + @SuppressWarnings("deprecation") + static byte[] writeManifestWithBrokenCharacters( + PositionInManifest i, String name, String value) + throws IOException { + byte[] vb = value.getBytes(UTF_8); + value = new String(vb, 0, 0, vb.length); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(out); + dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n"); + + if (i == PositionInManifest.MAIN_ATTRIBUTES) { + StringBuffer buffer = new StringBuffer(name); + buffer.append(": "); + buffer.append(value); + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + } + dos.writeBytes("\r\n"); + + if (i == PositionInManifest.SECTION_NAME || + i == PositionInManifest.NAMED_SECTION) { + StringBuffer buffer = new StringBuffer("Name: "); + if (i == PositionInManifest.SECTION_NAME) { + buffer.append(value); + } else { + buffer.append(FOUR_BYTE_NAME); + } + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + + if (i == PositionInManifest.NAMED_SECTION) { + buffer = new StringBuffer(name); + buffer.append(": "); + buffer.append(value); + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + } + + dos.writeBytes("\r\n"); + } + + dos.flush(); + return out.toByteArray(); + } + + /** + * Adds line breaks to enforce a maximum 72 bytes per line. + * <p> + * From previous Manifest implementation without respect for UTF-8 encoded + * character boundaries breaking also within multi-byte UTF-8 encoded + * characters. + * + * @see {@link Manifest#make72Safe} + */ + static void make72Safe(StringBuffer line) { + int length = line.length(); + int index = 72; + while (index < length) { + line.insert(index, "\r\n "); + index += 74; // + line width + line break ("\r\n") + length += 3; // + line break ("\r\n") and space + } + } + + @Test + public void testEmptyValue() throws Exception { + for (PositionInManifest i : PositionInManifest.values()) { + byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, ""); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, ""); + } + } + +} diff -r fb44f597981a test/jdk/java/util/jar/Manifest/PrintLine72.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/jdk/java/util/jar/Manifest/PrintLine72.java Wed Mar 25 19:49:15 2020 +0100 @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.jar.Manifest; +import java.util.jar.Attributes.Name; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.testng.annotations.*; +import static org.testng.Assert.*; + +/** + * @test + * @compile ../../../../sun/security/tools/jarsigner/Utils.java + * @bug 6202130 + * @run testng PrintLine72 + * @summary Tests {@link Manifest#println72} line breaking with some particular + * border case kind of test cases involving combining character sequence + * grapheme clusters. + * <p> + * For another test covering the complete Unicode character set see + * {@link ValueUtf8Coding} and for a test for not breaking Unicode code point + * UTF-8 encoded byte sequences see {@link PrintChar72}. + */ +public class PrintLine72 { + + static final Name TEST_NAME = new Name("test"); + + static final int NAME_SEP_LENGTH = (TEST_NAME + ": ").length(); + + void test(String originalValue, int... breakPositionsBytes) + throws IOException { + String expectedValueWithBreaksInManifest = originalValue; + // iterating backwards because inserting breaks affects positions + for (int i = breakPositionsBytes.length - 1; i >= 0; i--) { + int breakPositionBytes = breakPositionsBytes[i]; + + // Translate breakPositionBytes byte offset into + // breakPositionCharacters (primitive char type) character offset + // for cutting the string with String.substring lateron. + // Higher code points may be represented with two UTF-16 surrogate + // pair characters which both count for String.substring. + int bytesSoFar = 0; + int charsSoFar = 0; + while (bytesSoFar < breakPositionBytes) { + bytesSoFar += expectedValueWithBreaksInManifest + .substring(charsSoFar, charsSoFar + 1) + .getBytes(UTF_8).length; + charsSoFar++; + assertTrue(bytesSoFar <= breakPositionBytes, + "break position not aligned with characters"); + } + int breakPositionCharacters = charsSoFar; + + expectedValueWithBreaksInManifest = + expectedValueWithBreaksInManifest + .substring(0, breakPositionCharacters) + + "\r\n " + + expectedValueWithBreaksInManifest + .substring(breakPositionCharacters); + } + + Manifest mf = new Manifest(); + mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); + mf.getMainAttributes().put(TEST_NAME, originalValue); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mf.write(out); + byte[] mfBytes = out.toByteArray(); + + byte[] actual = mfBytes; + String expected = + "Manifest-Version: 1.0\r\n" + + TEST_NAME + ": " + expectedValueWithBreaksInManifest + + "\r\n\r\n"; + try { + assertEquals(new String(actual, UTF_8), expected); + assertEquals(actual, expected.getBytes(UTF_8)); + } catch (AssertionError e) { + Utils.echoManifest(mfBytes, "faulty manifest: " + e); + System.out.println("actual = " + byteArrayToIntList(actual)); + System.out.println("expected = " + byteArrayToIntList( + expected.getBytes(UTF_8))); + throw e; + } + } + + static List<Integer> byteArrayToIntList(byte[] bytes) { + List<Integer> list = new ArrayList<>(); + for (int i = 0; i < bytes.length; i++) { + list.add((int) bytes[i]); + } + return list; + } + + @Test + public void testBreakAlignmentSelfTest() throws Exception { + assertThrows(AssertionError.class, () -> test(getCharSeq(2), 1)); + assertThrows(AssertionError.class, () -> test("\uD841\uDF0E", 2)); + } + + @Test + public void testEmpty() throws Exception { + test(""); // expect neither a line break nor an exception + } + + static final String COMBINING_DIACRITICAL_MARKS = + IntStream.range(0x300, 0x36F) + .mapToObj(i -> new String(Character.toChars(i))) + .collect(Collectors.joining()); + + static String getCharSeq(int numberOfBytes) { + String seq = (numberOfBytes % 2 == 1 ? "e" : "\u00E6") + + COMBINING_DIACRITICAL_MARKS.substring(0, (numberOfBytes - 1) / 2); + assertEquals(seq.getBytes(UTF_8).length, numberOfBytes); + return seq; + } + + @Test + public void testBreakOnFirstLine() throws Exception { + // Combining sequence starts immediately after name and ": " and fits + // the remaining space in the first line. Expect no break. + test(getCharSeq(66)); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line by one byte. Expect to break on a + // new line because the combining sequence fits on a continuation line + // which does not start with name and ": " and provides enough space. + test(getCharSeq(67), 0); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line but still just fits exactly on a + // continuation line. Expect the value to break onto a new line. + test(getCharSeq(71), 0); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line and neither fits on a continuation + // line. Expect that the first line to be filled with as many codepoints + // as fit on it and expect a line break onto a continuation line after + // 66 bytes of the first line value. + test(getCharSeq(72), 72 - NAME_SEP_LENGTH); + + // Combining sequence starts after name and ": x" and exceeds the + // remaining space in the first line and neither fits on a continuation + // line. Expect that the first line to be filled with as many codepoints + // as fit on it and expect a line break onto a continuation line already + // after 65 bytes of the first line because the following character is + // a code point represented with two bytes in UTF-8 which should not + // be interrupted with a line break. + test("x" + getCharSeq(72), 72 - NAME_SEP_LENGTH - 1); + } + + @Test + public void testBreakOnContinuationLine() throws Exception { + final int FIRST_LINE_FREE_SPACE = 72 - NAME_SEP_LENGTH; // bytes + final String FILL_FIRST_LINE = "x".repeat(FIRST_LINE_FREE_SPACE); + + // fits on next line by skipping one byte free on current line + test(FILL_FIRST_LINE + "x".repeat(71 - 1) + getCharSeq(71), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71 - 1); + + // fits on current line exactly + test(FILL_FIRST_LINE + "x".repeat(71) + getCharSeq(71), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71); + + // fits on next line by inserting a line break after a line that + // contains only one character yet + test(FILL_FIRST_LINE + "x".repeat(71 + 1) + getCharSeq(71), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71, + FIRST_LINE_FREE_SPACE + 71 + 1); + + // does not fit on the next line and the one byte not yet used on the + // current line does not hold the first code point of the combined + // character sequence which is a code point encoded with two bytes in + // UTF-8. + assertTrue(getCharSeq(72).substring(0, 1).getBytes(UTF_8).length == 2); + test(FILL_FIRST_LINE + "x".repeat(71 - 1) + getCharSeq(72), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71 - 1, + FIRST_LINE_FREE_SPACE + 71 - 1 + 71 - 1); + + // would not fit on the next line alone but fits on the remaining two + // bytes available on the current line and the whole subsequent line. + test(FILL_FIRST_LINE + "x".repeat(71 - 2) + getCharSeq(72), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71); + + // previous character filled the whole previous line completely + // and the combined character sequence with 72 bytes still does not fit + // on a single line. the last code point is a two byte one so that an + // unused byte is left unused on the second last line. + assertTrue(getCharSeq(72).length() == 36); // primitive chars + assertTrue(getCharSeq(72).substring(35).getBytes(UTF_8).length == 2); + test(FILL_FIRST_LINE + "x".repeat(71) + getCharSeq(72), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71, + FIRST_LINE_FREE_SPACE + 71 + 71 - 1); + + // previous character left one byte used on the current line and 70 + // bytes remaining available. the combining sequence can use all of + // these 70 bytes because after 70 bytes a new code point starts + test(FILL_FIRST_LINE + "x".repeat(71 + 1) + getCharSeq(72), + FIRST_LINE_FREE_SPACE, + FIRST_LINE_FREE_SPACE + 71, + FIRST_LINE_FREE_SPACE + 71 + 71); + } + +} diff -r fb44f597981a test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java --- a/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Wed Mar 25 09:21:46 2020 -0700 +++ b/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Wed Mar 25 19:49:15 2020 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -21,32 +21,49 @@ * questions. */ -/* +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.jar.Attributes.Name; +import java.util.jar.Manifest; +import java.util.jar.JarFile; +import java.util.jar.JarEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipEntry; + +import static java.util.jar.JarFile.MANIFEST_NAME; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES; +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; + +import jdk.test.lib.SecurityTools; +import jdk.test.lib.util.JarUtils; + +/** * @test - * @bug 6695402 + * @bug 6695402 6202130 * @summary verify signatures of jars containing classes with names * with multi-byte unicode characters broken across lines * @library /test/lib */ - -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.jar.JarFile; -import java.util.jar.Attributes.Name; -import java.util.jar.JarEntry; - -import static java.nio.charset.StandardCharsets.UTF_8; - -import jdk.test.lib.SecurityTools; -import jdk.test.lib.util.JarUtils; - +/* + * Note to future maintainers: + * The case this test here (LineBrokenMultiByteCharacter) verifies has been + * addressed in bug 8217375 with a much more general approach than with bug + * 6695402 and bug 8217375 came with its own much broader set of tests so that + * now after resolution of bug 8217375, this test here might possibly not worth + * to maintain any further. + */ public class LineBrokenMultiByteCharacter { /** * this name will break across lines in MANIFEST.MF at the - * middle of a two-byte utf-8 encoded character due to its e acute letter + * middle of a two-byte UTF-8 encoded character due to its e acute letter * at its exact position. * * because no file with such a name exists {@link JarUtils} will add the @@ -63,53 +80,58 @@ static final String anotherName = "LineBrokenMultiByteCharacterA1234567890B1234567890C123456789D1234567890.class"; - static final String alias = "a"; - static final String keystoreFileName = "test.jks"; - static final String manifestFileName = "MANIFEST.MF"; + static final String ALIAS = "A"; + static final String KEYSTORE_FILENAME = "test.jks"; + static final String SOME_OTHER_SIG_FILE = "META-INF/FAKE_SIG.DSA"; public static void main(String[] args) throws Exception { prepare(); testSignJar("test.jar"); - testSignJarNoManifest("test-no-manifest.jar"); testSignJarUpdate("test-update.jar", "test-updated.jar"); } static void prepare() throws Exception { - SecurityTools.keytool("-keystore", keystoreFileName, "-genkeypair", + SecurityTools.keytool("-keystore", KEYSTORE_FILENAME, "-genkeypair", "-keyalg", "dsa", "-storepass", "changeit", "-keypass", "changeit", "-storetype", - "JKS", "-alias", alias, "-dname", "CN=X", "-validity", "366") + "JKS", "-alias", ALIAS, "-dname", "CN=X", "-validity", "366") .shouldHaveExitValue(0); - Files.write(Paths.get(manifestFileName), (Name. + new File(MANIFEST_NAME).getParentFile().mkdirs(); + Files.write(Paths.get(MANIFEST_NAME), (Name. MANIFEST_VERSION.toString() + ": 1.0\r\n").getBytes(UTF_8)); + + // prevent jarsigner from assuming it was safe to rewrite the manifest + // and its line breaks assuming there were no other signatures present + Files.write(Paths.get(SOME_OTHER_SIG_FILE), new byte[] {}); } static void testSignJar(String jarFileName) throws Exception { - JarUtils.createJar(jarFileName, manifestFileName, testClassName); - verifyJarSignature(jarFileName); - } - - static void testSignJarNoManifest(String jarFileName) throws Exception { - JarUtils.createJar(jarFileName, testClassName); + JarUtils.createJar(jarFileName, testClassName, SOME_OTHER_SIG_FILE); + createManifestEntries(jarFileName); + rebreakManifest72bytes(jarFileName); verifyJarSignature(jarFileName); } static void testSignJarUpdate( String initialFileName, String updatedFileName) throws Exception { - JarUtils.createJar(initialFileName, manifestFileName, anotherName); - SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype", + JarUtils.createJar(initialFileName, testClassName, anotherName, + SOME_OTHER_SIG_FILE); + createManifestEntries(initialFileName); + rebreakManifest72bytes(initialFileName); + removeJarEntry(initialFileName, testClassName); + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype", "JKS", "-storepass", "changeit", "-debug", initialFileName, - alias).shouldHaveExitValue(0); + ALIAS).shouldHaveExitValue(0); JarUtils.updateJar(initialFileName, updatedFileName, testClassName); verifyJarSignature(updatedFileName); } static void verifyJarSignature(String jarFileName) throws Exception { // actually sign the jar - SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype", - "JKS", "-storepass", "changeit", "-debug", jarFileName, alias) + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype", + "JKS", "-storepass", "changeit", "-debug", jarFileName, ALIAS) .shouldHaveExitValue(0); try ( @@ -130,7 +152,7 @@ * the signature file does not even contain the desired entry at all. * * this relies on {@link java.util.jar.Manifest} breaking lines unaware - * of bytes that belong to the same multi-byte utf characters. + * of bytes that belong to the same multi-byte UTF-8 encoded characters. */ static void verifyClassNameLineBroken(JarFile jar, String className) throws IOException { @@ -142,7 +164,7 @@ throw new AssertionError(className + " not found in manifest"); } - JarEntry manifestEntry = jar.getJarEntry(JarFile.MANIFEST_NAME); + JarEntry manifestEntry = jar.getJarEntry(MANIFEST_NAME); try ( InputStream manifestIs = jar.getInputStream(manifestEntry); ) { @@ -159,7 +181,7 @@ } if (bytesMatched < eAcuteBroken.length) { throw new AssertionError("self-test failed: multi-byte " - + "utf-8 character not broken across lines"); + + "UTF-8 encoded character not broken across lines"); } } } @@ -183,4 +205,108 @@ } } + static void createManifestEntries(String jarFileName) throws Exception { + JarUtils.updateJarFile(Paths.get(jarFileName), + Paths.get("."), Paths.get(MANIFEST_NAME)); + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, + "-storepass", "changeit", "-debug", jarFileName, ALIAS) + .shouldHaveExitValue(0); + // remove the signature files, only manifest is used + removeJarEntry(jarFileName, + "META-INF/" + ALIAS + ".SF", + "META-INF/" + ALIAS + ".DSA"); + } + + @SuppressWarnings("deprecation") + static void removeJarEntry(String jarFileName, String... entryNames) + throws IOException { + String aCopy = "swap-" + jarFileName; + JarUtils.updateJar(jarFileName, aCopy, Arrays.asList(entryNames) + .stream().collect(Collectors.toMap(e -> e, e -> false))); + Files.copy(Paths.get(aCopy), Paths.get(jarFileName), + COPY_ATTRIBUTES, REPLACE_EXISTING); + Files.delete(Paths.get(aCopy)); + } + + static void rebreakManifest72bytes(String jarFileName) throws Exception { + byte[] manifest; + try (ZipFile zip = new ZipFile(jarFileName)) { + ZipEntry zipEntry = zip.getEntry(MANIFEST_NAME); + manifest = zip.getInputStream(zipEntry).readAllBytes(); + } + Utils.echoManifest(manifest, MANIFEST_NAME + " before re-break:"); + byte[] manifest72 = rebreak72bytes(manifest); + Utils.echoManifest(manifest72, MANIFEST_NAME + " after re-break:"); + String aCopy = "swap-" + jarFileName; + JarUtils.updateManifest(jarFileName, aCopy, new Manifest() { @Override + public void write(OutputStream out) throws IOException { + out.write(manifest72); + } + }); + Files.copy(Paths.get(aCopy), Paths.get(jarFileName), + COPY_ATTRIBUTES, REPLACE_EXISTING); + Files.delete(Paths.get(aCopy)); + } + + /** + * Simulates a jar manifest as it would have been created by an earlier + * JDK by re-arranging the line break at exactly 72 bytes content thereby + * breaking the multi-byte UTF-8 encoded character under test like before + * resolution of bug 6202130. + * <p> + * The returned manifest is accepted as unmodified by + * {@link jdk.security.jarsigner.JarSigner#updateDigests + * (ZipEntry,ZipFile,MessageDigest[],Manifest)} on line 985: + * <pre> + * if (!mfDigest.equalsIgnoreCase(base64Digests[i])) { + * </pre> + * and therefore left unchanged when the jar is signed and also signature + * verification will check it. + */ + static byte[] rebreak72bytes(byte[] mf0) { + byte[] mf1 = new byte[mf0.length]; + int c0 = 0, c1 = 0; // bytes since last line start + for (int i0 = 0, i1 = 0; i0 < mf0.length; i0++, i1++) { + switch (mf0[i0]) { + case '\r': + if (i0 + 2 < mf0.length && + mf0[i0 + 1] == '\n' && mf0[i0 + 2] == ' ') { + // skip line break + i0 += 2; + i1 -= 1; + } else { + mf1[i1] = mf0[i0]; + c0 = c1 = 0; + } + break; + case '\n': + if (i0 + 1 < mf0.length && mf0[i0 + 1] == ' ') { + // skip line break + i0 += 1; + i1 -= 1; + } else { + mf1[i1] = mf0[i0]; + c0 = c1 = 0; + } + break; + case ' ': + if (c0 == 0) { + continue; + } + default: + c0++; + if (c1 == 72) { + mf1[i1++] = '\r'; + mf1[i1++] = '\n'; + mf1[i1++] = ' '; + c1 = 1; + } else { + c1++; + } + mf1[i1] = mf0[i0]; + } + } + return mf1; + } + }